#### Supplementary Material for "Holding Ground...."
## Geographic Regression Discontinuity
## Authors: Kathryn Baragwanath, Ella Bayi, and Guilherme N. Fasolin
##### Date: November 17, 2025

## Loading Packages
library(tidyverse)
library(data.table)
library(lubridate)
library(rdrobust)
library(fixest)
library(vtable)   
library(ggplot2) 
library(knitr)

rm(list = ls())
gc()

#Set working directory here
wd <- ""

### Load PAs data -------------------
df_pas <- readRDS(file.path(wd, "df_pas.rds"))

#------------------------------------------------------------------------------#
### Estimation IT - Geographic RDDs - Full Sample ----- Figure 3, Table S7 - Top
#-------------------------------------------------------------------------------
  
# Adjust distance variable: negative outside IT, positive inside
df_pas$near_it <- ifelse(df_pas$grid_is_it == 0, df_pas$near_it*(-1), df_pas$near_it)

## IT measured every defined period using a 12k bandwidth
p <- df_pas %>%
  filter(near_it >= -12000 & near_it <= 12000) %>%
  group_by(near_it_FID, year)

p <- p %>% 
  mutate(
    year = case_when(
      year %in% 2004:2007 ~ 2004,
      year %in% 2008:2011 ~ 2008,
      year %in% 2012:2018 ~ 2012,
      year %in% 2019:2022 ~ 2019,
      TRUE ~ year
    )
  )

p <- p %>% 
  mutate(
    year3 = case_when(
      year %in% 2004:2007 ~ 1,
      year %in% 2008:2011 ~ 2,
      year %in% 2012:2018 ~ 3,
      year %in% 2019:2022 ~ 4,
      TRUE ~ NA_real_
    )
  )

# Initialize list to store RDD models
mod <- list()

# Loop over years
for(i in 1:4){
  p_i <- subset(p, year3 == i)
  
  # Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
  p_i <- subset(p_i, !is.na(near_it_hom_year) & year >= near_it_hom_year)
  
  # Exclude other protected areas (SP and SU) from analysis
  p_i$deforestation_pct <- ifelse(p_i$grid_is_pa == 1, NA, p_i$deforestation_pct)
  
  # Outcome and running variable
  Y <- p_i$deforestation_pct
  X <- p_i$near_it
  
  # Run local linear RDD with triangular kernel and nearest-neighbor clustered SE
  mod[[i]] <- rdrobust(
    Y, X, c = 0, kernel = "triangular", p = 1,
    bwselect = "mserd", vce = "hc3", cluster = p_i$near_it_FID
  )
}

# Initialize an empty list to store results from each year
dat_rdd <- list()

# Loop over years
for(i in 1:4){
  # Extract coefficients and standard errors from the rdrobust model for year i
  coef_i <- mod[[i]]$coef
  se_i <- mod[[i]]$se
  
  # Create a data frame for this year with coefficients, standard errors, and year index
  dat_i <- data.frame(coef_i, se_i, i)
  
  # Assign type labels (Conventional, Bias-corrected, Robust) for each coefficient
  dat_i$type <- c("Conventional", "Bias", "Robust")
  
  # Store this year's data in the list
  dat_rdd[[i]] <- dat_i
}

# Combine all yearly data frames into a single data table
data_rdd <- rbindlist(dat_rdd, use.names = TRUE)

# Keep only the robust estimates for plotting
data_rdd <- subset(data_rdd, type == "Robust")

# Convert to a data frame for ggplot
plot <- data.frame(data_rdd, stringsAsFactors = FALSE)

# Rename columns for clarity
names(plot) <- c("Estimate", "S.E.", "Year3", "type")

# Convert Estimate and S.E. to numeric
plot$Estimate <- as.numeric(as.character(plot$Estimate))
plot$S.E. <- as.numeric(as.character(plot$S.E.))

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Store the original year index and adjust year to actual calendar year
plot$val <- plot$Year
plot$Year <- plot$Year + 2003

# Ensure year is treated as a factor for plotting
plot$Year <- factor(plot$Year, levels = plot$Year[order(plot$val)])

# Additional column for plotting aesthetics (not used in this snippet)
plot$main <- c(rep("D", 4))

# Define a custom theme for plots
mytheme <- theme(
  axis.line = element_line(size = 1, colour = "black"),
  panel.background = element_rect(fill = "white")
)

# Create a labeled factor for year periods.
plot$PeriodGroupLabel <- factor(
  plot$Year3,          # the period index: 1,2,3,4
  levels = 1:4,
  labels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")
)

plot$PeriodGroup <- factor(plot$PeriodGroup,
                           levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022"))

# Data frame for all four periods for highlighting
highlight_periods <- data.frame(
  PeriodGroup = factor(c("2004–2007", "2008–2011", "2012–2018", "2019–2022"),
                       levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")),
  ymin = -2,
  ymax = 0.5,
  fill_color = c("white", "white", "lightgray", "lightgray"),  # alternating colors
  text = c("PPCDAM I", "PPCDAM II", "Forest Code", "Bolsonaro Adm")  # text inside tiles
)

# Create ggplot
plot1_it <- ggplot(plot) +
  
  # Highlight periods with alternating colors
  geom_tile(data = highlight_periods,
            aes(x = PeriodGroup, y = (ymin + ymax)/2, height = ymax - ymin, width = 0.9, fill = fill_color),
            alpha = 0.4, inherit.aes = FALSE) +
  
  # Horizontal reference line at 0
  geom_hline(yintercept = 0, color = "black", size = 1) +
  
  # 90% CI (thicker)
  geom_segment(aes(x = PeriodGroup, y = lower1, xend = PeriodGroup, yend = upper1, color = PeriodGroup),
               size = 2) +   # thicker
  
  # 95% CI
  geom_segment(aes(x = PeriodGroup, y = lower, xend = PeriodGroup, yend = upper, color = PeriodGroup),
               size = 1.2) +   # thicker
  
  # Points (thicker)
  geom_point(aes(x = PeriodGroup, y = Estimate, color = PeriodGroup), size = 5) +  # bigger point
  
  # Text inside tiles
  geom_text(data = highlight_periods,
            aes(x = PeriodGroup, y = 0.4, label = text),
            color = "black", size = 5, inherit.aes = FALSE) +
  
  # Custom colors for points/segments
  scale_color_manual(
    values = c(
      "2004–2007" = "#4C78A8",
      "2008–2011" = "#4C78A8",   # brownish
      "2012–2018"   = "#F27A7A",   # blue
      "2019–2022"       = "#990000"    # purple
    ),
    name = "Period"
  ) +
  
  # Use fill_color for tiles but do not show legend
  scale_fill_identity() +
  
  # Theme: white background
  theme_minimal(base_size = 18) +
  theme(
    panel.background = element_rect(fill = "white", color = "white"),
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 18),
    axis.text.y = element_text(size = 18),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 18),
    legend.position = "none"
  ) +
  
  # Labels
  ylab("RDD Estimate") +
  
  # Y-axis limits and breaks
  scale_y_continuous(limits = c(-2, 0.5), breaks = seq(-2.5, 0.5, 0.5))

# Display the plot
plot1_it

## Display the coefficients in a tabular format - Table S7 - Panel A
# Compute p-values
plot$p_value <- 2 * (1 - pnorm(abs(plot$Estimate / plot$S.E.)))

# Add conventional significance stars
plot$significance <- cut(
  plot$p_value,
  breaks = c(-Inf, 0.001, 0.05, 0.1, Inf),
  labels = c("***", "**", "*", ""),
  right = FALSE
)

plot$Estimate_display <- paste0(sprintf("%.3f", plot$Estimate), plot$significance)

table_S7 <- plot[, c("Year3", "Estimate_display", "S.E.")]
colnames(table_S7) <- c("Year3", "Estimate", "Std. Error")

# Display
table_S7


#------------------------------------------------------------------------------#
### Estimation IT - Geographic RDDs - Arc of Deforestation ---- Figure 3, Table S7 - Top
#-----------------------------------------------------------------------------
## Every period using a 12k bandwidth, restricted to states in Arc-Region
p <- df_pas %>%
  filter(near_it >= -12000 & near_it <= 12000) %>%
  filter(sigla_uf %in% c("PA", "MT", "RO", "AC")) %>%
  group_by(near_it_FID, year)

p <- p %>% 
  mutate(
    year = case_when(
      year %in% 2004:2007 ~ 2004,
      year %in% 2008:2011 ~ 2008,
      year %in% 2012:2018 ~ 2012,
      year %in% 2019:2022 ~ 2019,
      TRUE ~ year
    )
  )

p <- p %>% 
  mutate(
    year3 = case_when(
      year %in% 2004:2007 ~ 1,
      year %in% 2008:2011 ~ 2,
      year %in% 2012:2018 ~ 3,
      year %in% 2019:2022 ~ 4,
      TRUE ~ NA_real_
    )
  )

# Initialize list to store RDD models
mod <- list()
# Loop over years
for(i in 1:4){
  p_i <- subset(p, year3 == i)
  
  # Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
  p_i <- subset(p_i, !is.na(near_it_hom_year) & year >= near_it_hom_year)
  
  # Exclude other protected areas (SP and SU) from analysis
  p_i$deforestation_pct <- ifelse(p_i$grid_is_pa == 1, NA, p_i$deforestation_pct)
  
  # Outcome and running variable
  Y <- p_i$deforestation_pct
  X <- p_i$near_it
  
  # Run local linear RDD with triangular kernel and nearest-neighbor clustered SE
  mod[[i]] <- rdrobust(
    Y, X, c = 0, kernel = "triangular", p = 1,
    bwselect = "mserd", vce = "hc3", cluster = p_i$near_it_FID
  )
}

# Initialize an empty list to store results from each year
dat_rdd <- list()

# Loop over 19 years (assumes mod list contains results for 19 years)
for(i in 1:4){
  # Extract coefficients and standard errors from the rdrobust model for year i
  coef_i <- mod[[i]]$coef
  se_i <- mod[[i]]$se
  
  # Create a data frame for this year with coefficients, standard errors, and year index
  dat_i <- data.frame(coef_i, se_i, i)
  
  # Assign type labels (Conventional, Bias-corrected, Robust) for each coefficient
  dat_i$type <- c("Conventional", "Bias", "Robust")
  
  # Store this year's data in the list
  dat_rdd[[i]] <- dat_i
}

# Combine all yearly data frames into a single data table
data_rdd <- rbindlist(dat_rdd, use.names = TRUE)

# Keep only the robust estimates for plotting
data_rdd <- subset(data_rdd, type == "Robust")

# Convert to a data frame for ggplot
plot <- data.frame(data_rdd, stringsAsFactors = FALSE)

# Rename columns for clarity
names(plot) <- c("Estimate", "S.E.", "Year3", "type")

# Convert Estimate and S.E. to numeric
plot$Estimate <- as.numeric(as.character(plot$Estimate))
plot$S.E. <- as.numeric(as.character(plot$S.E.))

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Store the original year index and adjust year to actual calendar year
plot$val <- plot$Year
plot$Year <- plot$Year + 2003

# Ensure year is treated as a factor for plotting
plot$Year <- factor(plot$Year, levels = plot$Year[order(plot$val)])

# Additional column for plotting aesthetics (not used in this snippet)
plot$main <- c(rep("D", 4))

# Define a custom theme for plots
mytheme <- theme(
  axis.line = element_line(size = 1, colour = "black"),
  panel.background = element_rect(fill = "white")
)

# Suppose your plot data has one row per period (year3)
plot$PeriodGroupLabel <- factor(
  plot$Year3,          # the period index: 1,2,3,4
  levels = 1:4,
  labels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")
)

# Ensure PeriodGroup is a factor in the desired order
plot$PeriodGroup <- factor(plot$PeriodGroup,
                           levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022"))

# Data frame for all four periods for highlighting
highlight_periods <- data.frame(
  PeriodGroup = factor(c("2004–2007", "2008–2011", "2012–2018", "2019–2022"),
                       levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")),
  ymin = -2,
  ymax = 0.5,
  fill_color = c("white", "white", "lightgray", "lightgray"),  # alternating colors
  text = c("PPCDAM I", "PPCDAM II", "Forest Code", "Bolsonaro Adm")  # text inside tiles
)

# Plot
plot1_it_arc <- ggplot(plot) +
  
  # Highlight periods with alternating colors
  geom_tile(data = highlight_periods,
            aes(x = PeriodGroup, y = (ymin + ymax)/2, height = ymax - ymin, width = 0.9, fill = fill_color),
            alpha = 0.4, inherit.aes = FALSE) +
  
  # Horizontal reference line at 0
  geom_hline(yintercept = 0, color = "black", size = 1) +
  
  # 90% CI (thicker)
  geom_segment(aes(x = PeriodGroup, y = lower1, xend = PeriodGroup, yend = upper1, color = PeriodGroup),
               size = 2) +   # thicker
  
  # 95% CI
  geom_segment(aes(x = PeriodGroup, y = lower, xend = PeriodGroup, yend = upper, color = PeriodGroup),
               size = 1.2) +   # thicker
  
  # Points (thicker)
  geom_point(aes(x = PeriodGroup, y = Estimate, color = PeriodGroup), size = 5) +  # bigger point
  
  # Text inside tiles
  geom_text(data = highlight_periods,
            aes(x = PeriodGroup, y = 0.4, label = text),
            color = "black", size = 5, inherit.aes = FALSE) +
  
  # Custom colors for points/segments
  scale_color_manual(
    values = c(
      "2004–2007" = "#4C78A8",
      "2008–2011" = "#4C78A8",   # brownish
      "2012–2018"   = "#F27A7A",   # blue
      "2019–2022"       = "#990000"    # purple
    ),
    name = "Period"
  ) +
  
  # Use fill_color for tiles but do not show legend
  scale_fill_identity() +
  
  # Theme: white background
  theme_minimal(base_size = 18) +
  theme(
    panel.background = element_rect(fill = "white", color = "white"),
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 18),
    axis.text.y = element_text(size = 18),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 18),
    legend.position = "none"
  ) +
  
  # Labels
  ylab("RDD Estimate") +
  
  # Y-axis limits and breaks
  scale_y_continuous(limits = c(-2, 0.6), breaks = seq(-2.5, 0.5, 0.5))

# Display the plot
plot1_it_arc

## Display the coefficients in a tabular format - Table S7 - Panel A
# Compute p-values
plot$p_value <- 2 * (1 - pnorm(abs(plot$Estimate / plot$S.E.)))

# Add conventional significance stars
plot$significance <- cut(
  plot$p_value,
  breaks = c(-Inf, 0.001, 0.05, 0.1, Inf),
  labels = c("***", "**", "*", ""),
  right = FALSE
)

plot$Estimate_display <- paste0(sprintf("%.3f", plot$Estimate), plot$significance)

table_S7 <- plot[, c("Year3", "Estimate_display", "S.E.")]
colnames(table_S7) <- c("Year3", "Estimate", "Std. Error")

# Display
table_S7

#------------------------------------------------------------------------------#
### Estimation IT - Geographic RDDs - Non-Arc of Deforestation ----- Figure 3, Table S7 - Top
#-----------------------------------------------------------------------------
## Every period using a 12k bandwidth, restricted to states in Non-Arc-Region
p <- df_pas %>%
  filter(near_it >= -12000 & near_it <= 12000) %>%
  filter(!sigla_uf %in% c("PA", "MT", "RO", "AC")) %>%
  group_by(near_it_FID, year)

p <- p %>% 
  mutate(
    year = case_when(
      year %in% 2004:2007 ~ 2004,
      year %in% 2008:2011 ~ 2008,
      year %in% 2012:2018 ~ 2012,
      year %in% 2019:2022 ~ 2019,
      TRUE ~ year
    )
  )

p <- p %>% 
  mutate(
    year3 = case_when(
      year %in% 2004:2007 ~ 1,
      year %in% 2008:2011 ~ 2,
      year %in% 2012:2018 ~ 3,
      year %in% 2019:2022 ~ 4,
      TRUE ~ NA_real_
    )
  )

# Initialize list to store RDD models
mod <- list()
# Loop over years
for(i in 1:4){
  p_i <- subset(p, year3 == i)
  
  # Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
  p_i <- subset(p_i, !is.na(near_it_hom_year) & year >= near_it_hom_year)
  
  # Exclude other protected areas (SP and SU) from analysis
  p_i$deforestation_pct <- ifelse(p_i$grid_is_pa == 1, NA, p_i$deforestation_pct)
  
  # Outcome and running variable
  Y <- p_i$deforestation_pct
  X <- p_i$near_it
  
  # Run local linear RDD with triangular kernel and nearest-neighbor clustered SE
  mod[[i]] <- rdrobust(
    Y, X, c = 0, kernel = "triangular", p = 1,
    bwselect = "mserd", vce = "hc3", cluster = p_i$near_it_FID
  )
}

# Initialize an empty list to store results from each year
dat_rdd <- list()

# Loop over 19 years (assumes mod list contains results for 19 years)
for(i in 1:4){
  # Extract coefficients and standard errors from the rdrobust model for year i
  coef_i <- mod[[i]]$coef
  se_i <- mod[[i]]$se
  
  # Create a data frame for this year with coefficients, standard errors, and year index
  dat_i <- data.frame(coef_i, se_i, i)
  
  # Assign type labels (Conventional, Bias-corrected, Robust) for each coefficient
  dat_i$type <- c("Conventional", "Bias", "Robust")
  
  # Store this year's data in the list
  dat_rdd[[i]] <- dat_i
}

# Combine all yearly data frames into a single data table
data_rdd <- rbindlist(dat_rdd, use.names = TRUE)

# Keep only the robust estimates for plotting
data_rdd <- subset(data_rdd, type == "Robust")

# Convert to a data frame for ggplot
plot <- data.frame(data_rdd, stringsAsFactors = FALSE)

# Rename columns for clarity
names(plot) <- c("Estimate", "S.E.", "Year3", "type")

# Convert Estimate and S.E. to numeric
plot$Estimate <- as.numeric(as.character(plot$Estimate))
plot$S.E. <- as.numeric(as.character(plot$S.E.))

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Store the original year index and adjust year to actual calendar year
plot$val <- plot$Year
plot$Year <- plot$Year + 2003

# Ensure year is treated as a factor for plotting
plot$Year <- factor(plot$Year, levels = plot$Year[order(plot$val)])

# Additional column for plotting aesthetics (not used in this snippet)
plot$main <- c(rep("D", 4))

# Define a custom theme for plots
mytheme <- theme(
  axis.line = element_line(size = 1, colour = "black"),
  panel.background = element_rect(fill = "white")
)

# Suppose your plot data has one row per period (year3)
plot$PeriodGroupLabel <- factor(
  plot$Year3,          # the period index: 1,2,3,4
  levels = 1:4,
  labels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")
)

# Ensure PeriodGroup is a factor in the desired order
plot$PeriodGroup <- factor(plot$PeriodGroup,
                           levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022"))

# Data frame for all four periods for highlighting
highlight_periods <- data.frame(
  PeriodGroup = factor(c("2004–2007", "2008–2011", "2012–2018", "2019–2022"),
                       levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")),
  ymin = -2,
  ymax = 0.5,
  fill_color = c("white", "white", "lightgray", "lightgray"),  # alternating colors
  text = c("PPCDAM I", "PPCDAM II", "Forest Code", "Bolsonaro Adm")  # text inside tiles
)

# Plot
plot1_it_non_arc <- ggplot(plot) +
  
  # Highlight periods with alternating colors
  geom_tile(data = highlight_periods,
            aes(x = PeriodGroup, y = (ymin + ymax)/2, height = ymax - ymin, width = 0.9, fill = fill_color),
            alpha = 0.4, inherit.aes = FALSE) +
  
  # Horizontal reference line at 0
  geom_hline(yintercept = 0, color = "black", size = 1) +
  
  # 90% CI (thicker)
  geom_segment(aes(x = PeriodGroup, y = lower1, xend = PeriodGroup, yend = upper1, color = PeriodGroup),
               size = 2) +   # thicker
  
  # 95% CI
  geom_segment(aes(x = PeriodGroup, y = lower, xend = PeriodGroup, yend = upper, color = PeriodGroup),
               size = 1.2) +   # thicker
  
  # Points (thicker)
  geom_point(aes(x = PeriodGroup, y = Estimate, color = PeriodGroup), size = 5) +  # bigger point
  
  # Text inside tiles
  geom_text(data = highlight_periods,
            aes(x = PeriodGroup, y = 0.4, label = text),
            color = "black", size = 5, inherit.aes = FALSE) +
  
  # Custom colors for points/segments
  scale_color_manual(
    values = c(
      "2004–2007" = "#4C78A8",
      "2008–2011" = "#4C78A8",   # brownish
      "2012–2018"   = "#F27A7A",   # blue
      "2019–2022"       = "#990000"    # purple
    ),
    name = "Period"
  ) +
  
  # Use fill_color for tiles but do not show legend
  scale_fill_identity() +
  
  # Theme: white background
  theme_minimal(base_size = 18) +
  theme(
    panel.background = element_rect(fill = "white", color = "white"),
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 18),
    axis.text.y = element_text(size = 18),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 18),
    legend.position = "none"
  ) +
  
  # Labels
  ylab("RDD Estimate") +
  
  # Y-axis limits and breaks
  scale_y_continuous(limits = c(-2, 0.5), breaks = seq(-2.5, 0.5, 0.5))

# Display the plot
plot1_it_non_arc

## Display the coefficients in a tabular format - Table S7 - Panel A
# Compute p-values
plot$p_value <- 2 * (1 - pnorm(abs(plot$Estimate / plot$S.E.)))

# Add conventional significance stars
plot$significance <- cut(
  plot$p_value,
  breaks = c(-Inf, 0.001, 0.05, 0.1, Inf),
  labels = c("***", "**", "*", ""),
  right = FALSE
)

plot$Estimate_display <- paste0(sprintf("%.3f", plot$Estimate), plot$significance)

table_S7 <- plot[, c("Year3", "Estimate_display", "S.E.")]
colnames(table_S7) <- c("Year3", "Estimate", "Std. Error")

# Display
table_S7

#------------------------------------------------------------------------------#
# Comparing dynamic effects: Inside vs. Outside IT ---- Figure S10 - Top ----
#-------------------------------------------------------------------------------
######  Filtering and preparing raw data - Pre-Forest Code
p <- df_pas %>%
  # Keep only the years 2004–2011
  filter(year >= 2004 & year <= 2011) %>%
  # For Arc of Deforestation, also use: # filter(sigla_uf %in% c("PA", "MT", "RO", "AC"))
  # For Non-Arc of Deforestation, also use: filter(!sigla_uf %in% c("PA", "MT", "RO", "AC"))
  
  # Group by unique spatial unit (near_it_FID) and year
  group_by(near_it_FID, year) %>%
  
  #Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
  subset(!is.na(near_it_hom_year) & year >= near_it_hom_year) %>%
  
  # Keep only observations within -12km to +12km from IT boundary
  filter(near_it >= -12000 & near_it <= 12000) %>%
  
  # Set deforestation_pct to NA for other protected areas (SP and SU)
  mutate(deforestation_pct = ifelse(grid_is_pa == 1, NA, deforestation_pct))

# Preparing binned data for plotting
p_binned <- p %>%
  # Creating 500-meter bins along 'near_it' distance
  mutate(bin = cut(near_it, breaks = seq(-12000, 12000, by = 500), include.lowest = TRUE)) %>%
  
  # Aggregating by bin, year, and whether grid is inside IT
  group_by(bin, year, grid_is_it) %>%
  summarise(
    near_it = mean(near_it, na.rm = TRUE),          # Average distance per bin
    deforestation_pct = mean(deforestation_pct, na.rm = TRUE),  # Average deforestation per bin
    .groups = "drop"
  ) %>%
  
  # Computing midpoint of each bin for plotting
  mutate(
    bin_midpoint = as.numeric(sub("\\[\\s*(-?\\d+),.*", "\\1", as.character(bin))) + 250
  )

# Filtering out NAs to prevent warnings in ggplot
p_binned_plot <- p_binned %>%
  filter(!is.na(deforestation_pct) & !is.na(bin_midpoint))

# Creating the plot
av_def_it_pre <- ggplot() + 

  # Local polynomial regression (inside IT) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_it == 1), 
    aes(x = near_it, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Local polynomial regression (outside IT) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_it == 0), 
    aes(x = near_it, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Vertical line at IT boundary (x = 0)
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Horizontal line at 0 deforestation
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Color palette for years
  scale_color_manual(values = c(  
    "2004" = "#D0E1F2",  
    "2005" = "#D0E1F2",  
    "2006" = "#79A8D1",  
    "2007" = "#79A8D1",  
    "2008" = "#79A8D1",  
    "2009" = "#082A69",  
    "2010" = "#082A69",  
    "2011" = "#082A69"  
  )) +  
  
  # Labels
  labs(title = "",
       x = "",
       y = "",
       color = "Year") +  
  
  # Theme customization
  theme_minimal(base_size = 18) +  
  theme(
    legend.position = "none",
    legend.title = element_text(size = 14, face = "bold"),
    legend.text = element_text(size = 14),
    axis.title = element_text(size = 22, face = "bold"),
    axis.text = element_text(size = 25),
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, face = "italic", hjust = 0.5),
    panel.grid.major = element_line(color = "white", size = 0.5),
    panel.grid.minor = element_line(color = "white", size = 0.3),
    panel.background = element_blank(),
    plot.background = element_blank(),
    axis.line = element_line(color = "black", size = 1)
  ) +  
  
  # Zoom in safely without dropping points
  coord_cartesian(
    xlim = c(-10000, 12000),  # in meters
    ylim = c(-0.1, 1.8)
  ) +  
  
  # Adjust axis scales
  scale_x_continuous(
    breaks = seq(-10000, 12000, by = 5000),  # every 5 km
    labels = scales::number_format(scale = 0.001),  # convert to km
    expand = c(0, 0)
  ) +
  scale_y_continuous(
    breaks = seq(0, 2.5, by = 0.5),
    expand = c(0, 0.1)
  )

av_def_it_pre

###### Filtering and preparing raw data - Post-Forest Code - 2012-2022
p <- df_pas %>%
  # Keep only the years 2004–2011
  filter(year >= 2012 & year <= 2022) %>%
  # For Arc of Deforestation, also use: # filter(sigla_uf %in% c("PA", "MT", "RO", "AC"))
  # For Non-Arc of Deforestation, also use: filter(!sigla_uf %in% c("PA", "MT", "RO", "AC"))
  
  # Group by unique spatial unit (near_it_FID) and year
  group_by(near_it_FID, year) %>%
  
  #Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
  subset(!is.na(near_it_hom_year) & year >= near_it_hom_year) %>%
  
  # Keep only observations within -12km to +12km from IT boundary
  filter(near_it >= -12000 & near_it <= 12000) %>%
  
  # Set deforestation_pct to NA for other protected areas (SP and SU)
  mutate(deforestation_pct = ifelse(grid_is_pa == 1, NA, deforestation_pct))

p_binned <- p %>%
  # Creating 500-meter bins along 'near_it' distance
  mutate(bin = cut(near_it, breaks = seq(-12000, 12000, by = 500), include.lowest = TRUE)) %>%
  
  # Aggregating by bin, year, and whether grid is inside IT
  group_by(bin, year, grid_is_it) %>%
  summarise(
    near_it = mean(near_it, na.rm = TRUE),          # Average distance per bin
    deforestation_pct = mean(deforestation_pct, na.rm = TRUE),  # Average deforestation per bin
    .groups = "drop"
  ) %>%
  
  # Computing midpoint of each bin for plotting
  mutate(
    bin_midpoint = as.numeric(sub("\\[\\s*(-?\\d+),.*", "\\1", as.character(bin))) + 250
  )

# Filtering out NAs to prevent warnings in ggplot
p_binned_plot <- p_binned %>%
  filter(!is.na(deforestation_pct) & !is.na(bin_midpoint))

# Creating the plot
av_def_it_post <- ggplot() + 
  # Local polynomial regression (inside IT) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_it == 1), 
    aes(x = near_it, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Local polynomial regression (outside IT) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_it == 0), 
    aes(x = near_it, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Vertical line at IT boundary (x = 0)
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Horizontal line at 0 deforestation
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Color palette for years
  scale_color_manual(values = c(  
    "2012" = "#FAD2D2",
    "2013" = "#FAD2D2",
    "2014" = "#FAD2D2",
    "2015" = "#F27A7A",
    "2016" = "#F27A7A",
    "2017" = "#F27A7A",
    "2018" = "#F26565",
    "2019" = "#B30000",
    "2020" = "#B30000",
    "2021" = "#800000",
    "2022" = "#800000" 
  )) +  

  # Labels
  labs(title = "",
       x = "",
       y = "",
       color = "Year") +  
  
  # Theme customization
  theme_minimal(base_size = 18) +  
  theme(
    legend.position = "none",
    legend.title = element_text(size = 14, face = "bold"),
    legend.text = element_text(size = 14),
    axis.title = element_text(size = 22, face = "bold"),
    axis.text = element_text(size = 25),
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, face = "italic", hjust = 0.5),
    panel.grid.major = element_line(color = "white", size = 0.5),
    panel.grid.minor = element_line(color = "white", size = 0.3),
    panel.background = element_blank(),
    plot.background = element_blank(),
    axis.line = element_line(color = "black", size = 1)
  ) +  
  
  # Zoom in safely without dropping points
  coord_cartesian(
    xlim = c(-10000, 12000),  # in meters
    ylim = c(-0.1, 1.8)
  ) +  
  
  # Adjust axis scales
  scale_x_continuous(
    breaks = seq(-10000, 12000, by = 5000),  # every 5 km
    labels = scales::number_format(scale = 0.001),  # convert to km
    expand = c(0, 0)
  ) +
  scale_y_continuous(
    breaks = seq(0, 2.5, by = 0.5),
    expand = c(0, 0.1)
  )

av_def_it_post


#------------------------------------------------------------------------------#
### Robustness Check 1 - Different Bandwidths ---- Figure S11 ----
#-------------------------------------------------------------------------------
bandwidths <- c(3000, 6000, 9000, 12000, 15000, 18000, 20000)

# Preparing the data
p <- df_pas %>%
  filter(near_it >= -20000 & near_it <= 20000) %>%
  group_by(near_it_FID, year)

# Initialize list to store RDD models
mod_list_all <- list()

# Loop over years
for (bw in bandwidths) {
  mod_list <- list()
  
  for (i in 2004:2022) {
    p_i <- subset(p, year == i)
    # Remove grids that are inside any IT that was homologated after the current year (future ITs)
    p_i <- subset(p_i, !is.na(near_it_hom_year) & year >= near_it_hom_year)
    
    # Exclude other protected areas from analysis
    p_i$deforestation_pct <- ifelse(p_i$grid_is_pa == 1, NA, p_i$deforestation_pct)
    
    # Outcome and running variable
    Y <- p_i$deforestation_pct
    X <- p_i$near_it
    
    valid <- !is.na(Y) & !is.na(X)
    Y <- Y[valid]
    X <- X[valid]
    cluster <- p_i$near_it_FID[valid]
    
    # Only run if both sides of cutoff have variation
    if (length(unique(X[X < 0])) > 1 & length(unique(X[X > 0])) > 1) {
      mod_list[[as.character(i)]] <- rdrobust(
        Y, X, c = 0, kernel = "triangular", p = 1,
        h = bw, vce = "nn", cluster = cluster
      )
    } else {
      mod_list[[as.character(i)]] <- NULL
    }
  }
  mod_list_all[[as.character(bw)]] <- mod_list
}

# Initialize an empty list to store results from each year
dat_rdd_all <- list()

for (bw in names(mod_list_all)) {
  mod_list <- mod_list_all[[bw]]
  dat_rdd <- list()
  
  for (year in names(mod_list)) {
    mod <- mod_list[[year]]
    if (!is.null(mod)) {
      coef_i <- mod$coef
      se_i <- mod$se
      dat_i <- data.frame(
        Estimate = coef_i[3],  # Robust estimate
        S.E. = se_i[3],        # Robust standard error
        Year = as.numeric(year),
        Bandwidth = as.numeric(bw)
      )
      dat_rdd[[year]] <- dat_i
    }
  }
  dat_rdd_all[[bw]] <- do.call(rbind, dat_rdd)
}

# Combine all into one dataframe
plot <- do.call(rbind, dat_rdd_all)

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Make Year a factor if needed for plotting
plot$Year <- factor(plot$Year, levels = sort(unique(plot$Year)))

plot_bw_it <- ggplot(plot, aes(x = Year, y = Estimate, color = as.factor(Bandwidth))) + 
  geom_hline(yintercept = 0, color = "black", size = 1) + 
  geom_vline(xintercept = c("2012", "2019"), color = "black", linetype = "dashed", size = 1) + 
  geom_point(size = 2, position = position_dodge(width = 0.5)) + 
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, size = 0.8,
                position = position_dodge(width = 0.5)) + 
  geom_errorbar(aes(ymin = lower1, ymax = upper1), width = 0.2, size = 1.5, alpha = 0.6,
                position = position_dodge(width = 0.5)) + 
  scale_y_continuous(
    breaks = seq(-2.5, 0.5, by = 0.5),
    limits = c(-2.5, 0.5)
  ) + 
  scale_x_discrete(drop = FALSE) + 
  labs(
    x = "Year",
    y = "RDD Estimate",
    title = "",
    color = "Bandwidth"
  ) + 
  theme_minimal() + 
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, size = 18),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16),
    axis.title.y = element_text(size = 16),
    legend.title = element_text(size = 16),
    legend.text = element_text(size = 14),
    legend.position = "bottom"
  ) + 
  annotate("text", x = "2012", y = -1, label = "Forest Code", angle = 90, vjust = -0.5, size = 5) +
  annotate("text", x = "2019", y = -1, label = "Bolsonaro Elected", angle = 90, vjust = -0.5, size = 5)

print(plot_bw_it)

#------------------------------------------------------------------------------#
### Robustness Check 2 - Balance Checks ---- Figure S14 ----
#-------------------------------------------------------------------------------
# Only keep grid cells within ±12 km of the IT border (homologated)
balance_data <- subset(
    df_pas,
    near_it >= -12000 & near_it <= 12000 &   
      !is.na(near_it_hom_year) &             
      year >= near_it_hom_year)

# List of covariates to plot with their labels
vars_to_plot <- list(
  elevation = "Elevation",
  hillshade = "Hillshade",
  slope = "Slope",
  near_rivers = "Rivers",
  near_roads_05 = "Roads",
  near_cities_FID = "Cities"
)

# Generating plots
plot_rdd <- function(var_name, y_label) {
  # Replace values inside other PAs with NA
  balance_data[[var_name]] <- ifelse(balance_data$grid_is_pa == 1, NA, balance_data[[var_name]])
  
  # Generate rdplot
  p <- rdplot(balance_data[[var_name]], balance_data$near_it,
              c = 0, p = 1, kernel = "tri", nbins = 300,
              x.label = "Distance to IT Border (m)",
              y.label = NULL,
              title = y_label)$rdplot
  
  # Apply a larger, publication-quality theme
  p + theme(
    axis.title.x = element_text(size = 12),
    axis.title.y = element_blank(),
    axis.text.x  = element_text(size = 12),
    axis.text.y  = element_text(size = 12),
    plot.title   = element_text(size = 12, face = "bold", hjust = 0.5),
    plot.margin  = margin(t = 10, r = 15, b = 10, l = 15)
  )
}

# Loop through variables and generate plots
plots <- lapply(names(vars_to_plot), function(v) plot_rdd(v, vars_to_plot[[v]]))

# Combining all plots into one figure
combined_plot_it <- do.call(grid.arrange, c(plots, ncol = 2))


#------------------------------------------------------------------------------#
### Robustness Check 3 - Placebo Threshold Tests ---- Figure S17 - Top ----
#-------------------------------------------------------------------------------
# Preparing the data
p <- df_pas %>%
  filter(near_it >= -12000 & near_it <= 12000) %>%
  group_by(near_it_FID, year)

# Placebo offsets in meters
placebo_offsets <- c(-5000, -2000, 2000, 5000)

dat_rdd_all <- list()
mod_placebo <- list()

for(offset in placebo_offsets){
  mod_list <- list()
  
  for(i in 2004:2022){
    p_i <- subset(p, year == i)
    
    # Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
    p_i <- subset(p_i, !is.na(near_it_hom_year) & year >= near_it_hom_year)
    
    # Exclude other protected areas (SP and SU) from analysis
    p_i$deforestation_pct <- ifelse(p_i$grid_is_pa == 1, NA, p_i$deforestation_pct)
    
    Y <- p_i$deforestation_pct
    X <- p_i$near_it
    
    mod_list[[i - 2003]] <- rdrobust(
      Y, X, c = offset, kernel = "triangular", p = 1,
      bwselect = "mserd", vce = "nn", cluster = p_i$near_it_FID
    )
  }
  
  # Collect results
  dat_rdd <- list()
  for(j in 1:length(mod_list)){
    coef_i <- mod_list[[j]]$coef[1]
    se_i   <- mod_list[[j]]$se[1]
    dat_rdd[[j]] <- data.frame(
      Estimate = coef_i,
      S.E = se_i,
      Year = 2003 + j,
      Bandwidth = paste0(ifelse(offset>0,"+", ""), offset/1000, "km")
      
    )
  }
  
  dat_rdd_all[[paste0(offset)]] <- rbindlist(dat_rdd)
}

plot <- do.call(rbind, dat_rdd_all)

# Compute confidence intervals
plot$lower  <- plot$Estimate - 1.96 * plot$S.E
plot$upper  <- plot$Estimate + 1.96 * plot$S.E
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E

# Make Year a factor
plot$Year <- factor(plot$Year, levels = sort(unique(plot$Year)))

# Plot
plot_bw_placebo_it <- ggplot(plot, aes(x = Year, y = Estimate, color = Bandwidth)) +
  geom_hline(yintercept = 0, color = "black", size = 1) +
  geom_vline(xintercept = c("2012", "2019"), color = "black", linetype = "dashed", size = 1) +
  geom_point(size = 2, position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, size = 0.8,
                position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymin = lower1, ymax = upper1), width = 0.2, size = 1.5, alpha = 0.6,
                position = position_dodge(width = 0.5)) +
  scale_y_continuous(
    breaks = seq(-2.5, 1.5, by = 0.5),
    limits = c(-2.5, 1.5)
  ) +
  scale_x_discrete(drop = FALSE) +
  labs(
    x = "Year",
    y = "RDD Estimate",
    color = "Threshold Distance"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, size = 18),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16),
    axis.title.y = element_text(size = 16),
    legend.title = element_text(size = 16),
    legend.text = element_text(size = 14),
    legend.position = "bottom"
  ) +
  annotate("text", x = "2012", y = -1.5, label = "Forest Code", angle = 90, vjust = -0.5, size = 4) +
  annotate("text", x = "2019", y = -1.5, label = "Bolsonaro Elected", angle = 90, vjust = -0.5, size = 4)

print(plot_bw_placebo_it)

#------------------------------------------------------------------------------#
### Estimation SP - Geographic RDDs - Full Sample ----- Figure 3, Table S7 - Middle
#-----------------------------------------------------------------------------
# Create 'near_sp' containing 'near_pa' values only for SP areas; others set to NA
df_pas$near_sp <- ifelse(df_pas$near_pa_type == "Proteção Integral", df_pas$near_pa, 
                           as.numeric(NA))

## Keep only the federal-level protected areas 
df_pas <- df_pas %>% filter(near_pa_gov == "Federal")

# Adjust distance variable: negative outside IT, positive inside
df_pas$near_sp <- ifelse(df_pas$grid_is_sp == 0, df_pas$near_sp*(-1), df_pas$near_sp)

p <- df_pas %>%
  filter(near_sp >= -12000 & near_sp <= 12000) %>%
  group_by(near_pa_FID, year)

p <- p %>% 
  mutate(
    year = case_when(
      year %in% 2004:2007 ~ 2004,
      year %in% 2008:2011 ~ 2008,
      year %in% 2012:2018 ~ 2012,
      year %in% 2019:2022 ~ 2019,
      TRUE ~ year
    )
  )

p <- p %>% 
  mutate(
    year3 = case_when(
      year %in% 2004:2007 ~ 1,
      year %in% 2008:2011 ~ 2,
      year %in% 2012:2018 ~ 3,
      year %in% 2019:2022 ~ 4,
      TRUE ~ NA_real_
    )
  )

# Initialize list to store RDD models
mod <- list()
# Loop over years
for(i in 1:4){
  p_i <- subset(p, year3 == i)
  
  # Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
  p_i <- subset(p_i, !is.na(near_pa_ano) & year >= near_pa_ano)
  
  # Exclude other protected areas (SP and SU) from analysis
  p_i$deforestation_pct <- ifelse(p_i$grid_is_it == 1 & p_i$grid_is_su == 1, NA, p_i$deforestation_pct)
  
  # Outcome and running variable
  Y <- p_i$deforestation_pct
  X <- p_i$near_sp
  
  
  # Run local linear RDD with triangular kernel and nearest-neighbor clustered SE
  mod[[i]] <- rdrobust(
    Y, X, c = 0, kernel = "triangular", p = 1,
    bwselect = "mserd", vce = "hc3", cluster = p_i$near_pa_FID
  )
}

# Initialize an empty list to store results from each year
dat_rdd <- list()

# Loop over 19 years (assumes mod list contains results for 19 years)
for(i in 1:4){
  # Extract coefficients and standard errors from the rdrobust model for year i
  coef_i <- mod[[i]]$coef
  se_i <- mod[[i]]$se
  
  # Create a data frame for this year with coefficients, standard errors, and year index
  dat_i <- data.frame(coef_i, se_i, i)
  
  # Assign type labels (Conventional, Bias-corrected, Robust) for each coefficient
  dat_i$type <- c("Conventional", "Bias", "Robust")
  
  # Store this year's data in the list
  dat_rdd[[i]] <- dat_i
}

# Combine all yearly data frames into a single data table
data_rdd <- rbindlist(dat_rdd, use.names = TRUE)

# Keep only the robust estimates for plotting
data_rdd <- subset(data_rdd, type == "Robust")

# Convert to a data frame for ggplot
plot <- data.frame(data_rdd, stringsAsFactors = FALSE)

# Rename columns for clarity
names(plot) <- c("Estimate", "S.E.", "Year3", "type")

# Convert Estimate and S.E. to numeric
plot$Estimate <- as.numeric(as.character(plot$Estimate))
plot$S.E. <- as.numeric(as.character(plot$S.E.))

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Store the original year index and adjust year to actual calendar year
plot$val <- plot$Year
plot$Year <- plot$Year + 2003

# Ensure year is treated as a factor for plotting
plot$Year <- factor(plot$Year, levels = plot$Year[order(plot$val)])

# Additional column for plotting aesthetics (not used in this snippet)
plot$main <- c(rep("D", 4))

# Define a custom theme for plots
mytheme <- theme(
  axis.line = element_line(size = 1, colour = "black"),
  panel.background = element_rect(fill = "white")
)

# Suppose your plot data has one row per period (year3)
plot$PeriodGroupLabel <- factor(
  plot$Year3,          # the period index: 1,2,3,4
  levels = 1:4,
  labels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")
)

# Ensure PeriodGroup is a factor in the desired order
plot$PeriodGroup <- factor(plot$PeriodGroup,
                           levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022"))

# Data frame for all four periods for highlighting
highlight_periods <- data.frame(
  PeriodGroup = factor(c("2004–2007", "2008–2011", "2012–2018", "2019–2022"),
                       levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")),
  ymin = -2,
  ymax = 0.5,
  fill_color = c("white", "white", "lightgray", "lightgray"),  # alternating colors
  text = c("PPCDAM I", "PPCDAM II", "Forest Code", "Bolsonaro Adm")  # text inside tiles
)

# Plot
plot1_sp <- ggplot(plot) +
  
  # Highlight periods with alternating colors
  geom_tile(data = highlight_periods,
            aes(x = PeriodGroup, y = (ymin + ymax)/2, height = ymax - ymin, width = 0.9, fill = fill_color),
            alpha = 0.4, inherit.aes = FALSE) +
  
  # Horizontal reference line at 0
  geom_hline(yintercept = 0, color = "black", size = 1) +
  
  # 90% CI (thicker)
  geom_segment(aes(x = PeriodGroup, y = lower1, xend = PeriodGroup, yend = upper1, color = PeriodGroup),
               size = 2) +   # thicker
  
  # 95% CI
  geom_segment(aes(x = PeriodGroup, y = lower, xend = PeriodGroup, yend = upper, color = PeriodGroup),
               size = 1.2) +   # thicker
  
  # Points (thicker)
  geom_point(aes(x = PeriodGroup, y = Estimate, color = PeriodGroup), size = 5) +  # bigger point
  
  # Text inside tiles
  # geom_text(data = highlight_periods,
  # aes(x = PeriodGroup, y = 0.4, label = text),
  # color = "black", size = 5, inherit.aes = FALSE)
  
  # Custom colors for points/segments
  
  scale_color_manual(
    values = c(
      "2004–2007" = "#4C78A8",
      "2008–2011" = "#4C78A8",   # brownish
      "2012–2018"   = "#F27A7A",   # blue
      "2019–2022"       = "#990000"    # purple
    ),
    name = "Period"
  ) +
  
  # Use fill_color for tiles but do not show legend
  scale_fill_identity() +
  
  # Theme: white background
  theme_minimal(base_size = 18) +
  theme(
    panel.background = element_rect(fill = "white", color = "white"),
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 18),
    axis.text.y = element_text(size = 18),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 18),
    legend.position = "none"
  ) +
  
  # Labels
  ylab("RDD Estimate") +
  
  # Y-axis limits and breaks
  scale_y_continuous(limits = c(-2, 0.5), breaks = seq(-2.5, 0.5, 0.5))

# Display the plot
plot1_sp

## Display the coefficients in a tabular format - Table S7 - Panel B
plot$p_value <- 2 * (1 - pnorm(abs(plot$Estimate / plot$S.E.)))

# Add conventional significance stars
plot$significance <- cut(
  plot$p_value,
  breaks = c(-Inf, 0.001, 0.05, 0.1, Inf),
  labels = c("***", "**", "*", ""),
  right = FALSE
)

plot$Estimate_display <- paste0(sprintf("%.3f", plot$Estimate), plot$significance)

table_S7 <- plot[, c("Year3", "Estimate_display", "S.E.")]
colnames(table_S7) <- c("Year3", "Estimate", "Std. Error")

# Display
table_S7

#------------------------------------------------------------------------------#
### Estimation SP - Geographic RDDs - Arc of Deforestation ---- Figure 3, Table S7 - Middle
#-----------------------------------------------------------------------------
## Every period using a 12k bandwidth, restricted to states in Arc-Region

p <- df_pas %>%
  filter(near_sp >= -12000 & near_sp <= 12000) %>%
  filter(sigla_uf %in% c("PA", "MT", "RO", "AC")) %>%
  group_by(near_pa_FID, year)

p <- p %>% 
  mutate(
    year = case_when(
      year %in% 2004:2007 ~ 2004,
      year %in% 2008:2011 ~ 2008,
      year %in% 2012:2018 ~ 2012,
      year %in% 2019:2022 ~ 2019,
      TRUE ~ year
    )
  )

p <- p %>% 
  mutate(
    year3 = case_when(
      year %in% 2004:2007 ~ 1,
      year %in% 2008:2011 ~ 2,
      year %in% 2012:2018 ~ 3,
      year %in% 2019:2022 ~ 4,
      TRUE ~ NA_real_
    )
  )

# Initialize list to store RDD models
mod <- list()
# Loop over years
for(i in 1:4){
  p_i <- subset(p, year3 == i)
  
  # Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
  p_i <- subset(p_i, !is.na(near_pa_ano) & year >= near_pa_ano)
  
  # Exclude other protected areas (SP and SU) from analysis
  p_i$deforestation_pct <- ifelse(p_i$grid_is_it == 1 & p_i$grid_is_su == 1, NA, p_i$deforestation_pct)
  
  # Outcome and running variable
  Y <- p_i$deforestation_pct
  X <- p_i$near_sp
  
  # Run local linear RDD with triangular kernel and nearest-neighbor clustered SE
  mod[[i]] <- rdrobust(
    Y, X, c = 0, kernel = "triangular", p = 1,
    bwselect = "mserd", vce = "hc3", cluster = p_i$near_pa_FID
  )
}

# Initialize an empty list to store results from each year
dat_rdd <- list()

# Loop over 19 years (assumes mod list contains results for 19 years)
for(i in 1:4){
  # Extract coefficients and standard errors from the rdrobust model for year i
  coef_i <- mod[[i]]$coef
  se_i <- mod[[i]]$se
  
  # Create a data frame for this year with coefficients, standard errors, and year index
  dat_i <- data.frame(coef_i, se_i, i)
  
  # Assign type labels (Conventional, Bias-corrected, Robust) for each coefficient
  dat_i$type <- c("Conventional", "Bias", "Robust")
  
  # Store this year's data in the list
  dat_rdd[[i]] <- dat_i
}

# Combine all yearly data frames into a single data table
data_rdd <- rbindlist(dat_rdd, use.names = TRUE)

# Keep only the robust estimates for plotting
data_rdd <- subset(data_rdd, type == "Robust")

# Convert to a data frame for ggplot
plot <- data.frame(data_rdd, stringsAsFactors = FALSE)

# Rename columns for clarity
names(plot) <- c("Estimate", "S.E.", "Year3", "type")

# Convert Estimate and S.E. to numeric
plot$Estimate <- as.numeric(as.character(plot$Estimate))
plot$S.E. <- as.numeric(as.character(plot$S.E.))

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Store the original year index and adjust year to actual calendar year
plot$val <- plot$Year
plot$Year <- plot$Year + 2003

# Ensure year is treated as a factor for plotting
plot$Year <- factor(plot$Year, levels = plot$Year[order(plot$val)])

# Additional column for plotting aesthetics (not used in this snippet)
plot$main <- c(rep("D", 4))

# Define a custom theme for plots
mytheme <- theme(
  axis.line = element_line(size = 1, colour = "black"),
  panel.background = element_rect(fill = "white")
)

# Suppose your plot data has one row per period (year3)
plot$PeriodGroupLabel <- factor(
  plot$Year3,          # the period index: 1,2,3,4
  levels = 1:4,
  labels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")
)

# Ensure PeriodGroup is a factor in the desired order
plot$PeriodGroup <- factor(plot$PeriodGroup,
                           levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022"))

# Data frame for all four periods for highlighting
highlight_periods <- data.frame(
  PeriodGroup = factor(c("2004–2007", "2008–2011", "2012–2018", "2019–2022"),
                       levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")),
  ymin = -2,
  ymax = 0.5,
  fill_color = c("white", "white", "lightgray", "lightgray"),  # alternating colors
  text = c("PPCDAM I", "PPCDAM II", "Forest Code", "Bolsonaro Adm")  # text inside tiles
)

# Plot
plot1_sp_arc <- ggplot(plot) +
  
  # Highlight periods with alternating colors
  geom_tile(data = highlight_periods,
            aes(x = PeriodGroup, y = (ymin + ymax)/2, height = ymax - ymin, width = 0.9, fill = fill_color),
            alpha = 0.4, inherit.aes = FALSE) +
  
  # Horizontal reference line at 0
  geom_hline(yintercept = 0, color = "black", size = 1) +
  
  # 90% CI (thicker)
  geom_segment(aes(x = PeriodGroup, y = lower1, xend = PeriodGroup, yend = upper1, color = PeriodGroup),
               size = 2) +   # thicker
  
  # 95% CI
  geom_segment(aes(x = PeriodGroup, y = lower, xend = PeriodGroup, yend = upper, color = PeriodGroup),
               size = 1.2) +   # thicker
  
  # Points (thicker)
  geom_point(aes(x = PeriodGroup, y = Estimate, color = PeriodGroup), size = 5) +  # bigger point
  
  # Text inside tiles
  # geom_text(data = highlight_periods,
  # aes(x = PeriodGroup, y = 0.4, label = text),
  # color = "black", size = 5, inherit.aes = FALSE)
  
  # Custom colors for points/segments
  scale_color_manual(
    values = c(
      "2004–2007" = "#4C78A8",
      "2008–2011" = "#4C78A8",   # brownish
      "2012–2018"   = "#F27A7A",   # blue
      "2019–2022"       = "#990000"    # purple
    ),
    name = "Period"
  ) +
  
  # Use fill_color for tiles but do not show legend
  scale_fill_identity() +
  
  # Theme: white background
  theme_minimal(base_size = 18) +
  theme(
    panel.background = element_rect(fill = "white", color = "white"),
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 18),
    axis.text.y = element_text(size = 18),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 18),
    legend.position = "none"
  ) +
  
  # Labels
  ylab("RDD Estimate") +
  
  # Y-axis limits and breaks
  scale_y_continuous(limits = c(-2, 0.5), breaks = seq(-2.5, 0.5, 0.5))

# Display the plot
plot1_sp_arc

## Display the coefficients in a tabular format - Table S7 - Panel B
## Calculating P value
plot$p_value <- 2 * (1 - pnorm(abs(plot$Estimate / plot$S.E.)))

# Add conventional significance stars
plot$significance <- cut(
  plot$p_value,
  breaks = c(-Inf, 0.001, 0.05, 0.1, Inf),
  labels = c("***", "**", "*", ""),
  right = FALSE
)

plot$Estimate_display <- paste0(sprintf("%.3f", plot$Estimate), plot$significance)

table_S7 <- plot[, c("Year3", "Estimate_display", "S.E.")]
colnames(table_S7) <- c("Year3", "Estimate", "Std. Error")

# Display
table_S7

#------------------------------------------------------------------------------#
### Estimation SP - Geographic RDDs - Non-Arc of Deforestation ---- Figure 3, Table S7 - Middle
#-----------------------------------------------------------------------------
## Every period using a 12k bandwidth, restricted to states in Non-Arc-Region
p <- df_pas %>%
  filter(near_sp >= -12000 & near_sp <= 12000) %>%
  filter(!sigla_uf %in% c("PA", "MT", "RO", "AC")) %>%
  group_by(near_pa_FID, year)

p <- p %>% 
  mutate(
    year = case_when(
      year %in% 2004:2007 ~ 2004,
      year %in% 2008:2011 ~ 2008,
      year %in% 2012:2018 ~ 2012,
      year %in% 2019:2022 ~ 2019,
      TRUE ~ year
    )
  )

p <- p %>% 
  mutate(
    year3 = case_when(
      year %in% 2004:2007 ~ 1,
      year %in% 2008:2011 ~ 2,
      year %in% 2012:2018 ~ 3,
      year %in% 2019:2022 ~ 4,
      TRUE ~ NA_real_
    )
  )

# Initialize list to store RDD models
mod <- list()
# Loop over years
for(i in 1:4){
  p_i <- subset(p, year3 == i)
  
  # Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
  p_i <- subset(p_i, !is.na(near_pa_ano) & year >= near_pa_ano)
  
  # Exclude other protected areas (SP and SU) from analysis
  p_i$deforestation_pct <- ifelse(p_i$grid_is_it == 1 & p_i$grid_is_su == 1, NA, p_i$deforestation_pct)
  
  # Outcome and running variable
  Y <- p_i$deforestation_pct
  X <- p_i$near_sp
  
  # Run local linear RDD with triangular kernel and nearest-neighbor clustered SE
  mod[[i]] <- rdrobust(
    Y, X, c = 0, kernel = "triangular", p = 1,
    bwselect = "mserd", vce = "hc3", cluster = p_i$near_pa_FID
  )
}

# Initialize an empty list to store results from each year
dat_rdd <- list()

# Loop over 19 years (assumes mod list contains results for 19 years)
for(i in 1:4){
  # Extract coefficients and standard errors from the rdrobust model for year i
  coef_i <- mod[[i]]$coef
  se_i <- mod[[i]]$se
  
  # Create a data frame for this year with coefficients, standard errors, and year index
  dat_i <- data.frame(coef_i, se_i, i)
  
  # Assign type labels (Conventional, Bias-corrected, Robust) for each coefficient
  dat_i$type <- c("Conventional", "Bias", "Robust")
  
  # Store this year's data in the list
  dat_rdd[[i]] <- dat_i
}

# Combine all yearly data frames into a single data table
data_rdd <- rbindlist(dat_rdd, use.names = TRUE)

# Keep only the robust estimates for plotting
data_rdd <- subset(data_rdd, type == "Robust")

# Convert to a data frame for ggplot
plot <- data.frame(data_rdd, stringsAsFactors = FALSE)

# Rename columns for clarity
names(plot) <- c("Estimate", "S.E.", "Year3", "type")

# Convert Estimate and S.E. to numeric
plot$Estimate <- as.numeric(as.character(plot$Estimate))
plot$S.E. <- as.numeric(as.character(plot$S.E.))

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Store the original year index and adjust year to actual calendar year
plot$val <- plot$Year
plot$Year <- plot$Year + 2003

# Ensure year is treated as a factor for plotting
plot$Year <- factor(plot$Year, levels = plot$Year[order(plot$val)])

# Additional column for plotting aesthetics (not used in this snippet)
plot$main <- c(rep("D", 4))

# Define a custom theme for plots
mytheme <- theme(
  axis.line = element_line(size = 1, colour = "black"),
  panel.background = element_rect(fill = "white")
)

# Suppose your plot data has one row per period (year3)
plot$PeriodGroupLabel <- factor(
  plot$Year3,          # the period index: 1,2,3,4
  levels = 1:4,
  labels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")
)

# Ensure PeriodGroup is a factor in the desired order
plot$PeriodGroup <- factor(plot$PeriodGroup,
                           levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022"))

# Data frame for all four periods for highlighting
highlight_periods <- data.frame(
  PeriodGroup = factor(c("2004–2007", "2008–2011", "2012–2018", "2019–2022"),
                       levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")),
  ymin = -2,
  ymax = 0.5,
  fill_color = c("white", "white", "lightgray", "lightgray"),  # alternating colors
  text = c("PPCDAM I", "PPCDAM II", "Forest Code", "Bolsonaro Adm")  # text inside tiles
)

# Plot
plot1_sp_non_arc <- ggplot(plot) +
  
  # Highlight periods with alternating colors
  geom_tile(data = highlight_periods,
            aes(x = PeriodGroup, y = (ymin + ymax)/2, height = ymax - ymin, width = 0.9, fill = fill_color),
            alpha = 0.4, inherit.aes = FALSE) +
  
  # Horizontal reference line at 0
  geom_hline(yintercept = 0, color = "black", size = 1) +
  
  # 90% CI (thicker)
  geom_segment(aes(x = PeriodGroup, y = lower1, xend = PeriodGroup, yend = upper1, color = PeriodGroup),
               size = 2) +   # thicker
  
  # 95% CI
  geom_segment(aes(x = PeriodGroup, y = lower, xend = PeriodGroup, yend = upper, color = PeriodGroup),
               size = 1.2) +   # thicker
  
  # Points (thicker)
  geom_point(aes(x = PeriodGroup, y = Estimate, color = PeriodGroup), size = 5) +  # bigger point
  
  # Text inside tiles
  # geom_text(data = highlight_periods,
  # aes(x = PeriodGroup, y = 0.4, label = text),
  # color = "black", size = 5, inherit.aes = FALSE)
  
  # Custom colors for points/segments
  scale_color_manual(
    values = c(
      "2004–2007" = "#4C78A8",
      "2008–2011" = "#4C78A8",   # brownish
      "2012–2018"   = "#F27A7A",   # blue
      "2019–2022"       = "#990000"    # purple
    ),
    name = "Period"
  ) +
  
  # Use fill_color for tiles but do not show legend
  scale_fill_identity() +
  
  # Theme: white background
  theme_minimal(base_size = 18) +
  theme(
    panel.background = element_rect(fill = "white", color = "white"),
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 18),
    axis.text.y = element_text(size = 18),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 18),
    legend.position = "none"
  ) +
  
  # Labels
  ylab("RDD Estimate") +
  
  # Y-axis limits and breaks
  scale_y_continuous(limits = c(-2, 0.6), breaks = seq(-2.5, 0.5, 0.5))

# Display the plot
plot1_sp_non_arc

## Display the coefficients in a tabular format - Table S7 - Panel B
# Compute p-values
plot$p_value <- 2 * (1 - pnorm(abs(plot$Estimate / plot$S.E.)))

# Add conventional significance stars
plot$significance <- cut(
  plot$p_value,
  breaks = c(-Inf, 0.001, 0.05, 0.1, Inf),
  labels = c("***", "**", "*", ""),
  right = FALSE
)

plot$Estimate_display <- paste0(sprintf("%.3f", plot$Estimate), plot$significance)

table_S7 <- plot[, c("Year3", "Estimate_display", "S.E.")]
colnames(table_S7) <- c("Year3", "Estimate", "Std. Error")

# Display
table_S7


#------------------------------------------------------------------------------
# Comparing dynamic effects: Inside vs. Outside SP ---- Figure S10 - Middle ----
#-------------------------------------------------------------------------------
p <- df_pas %>%
  # Keep only the years 2004–2011
  filter(year >= 2004 & year <= 2011) %>%
  # For Arc of Deforestation, also use: # filter(sigla_uf %in% c("PA", "MT", "RO", "AC"))
  # For Non-Arc of Deforestation, also use: filter(!sigla_uf %in% c("PA", "MT", "RO", "AC"))
  
  # Group by unique spatial unit (near_pa_FID) and year
  group_by(near_pa_FID, year) %>%
  
  #Keep only grids in SP homologated by the current year and remove grids with no homologation year (NA) 
  subset(!is.na(near_pa_ano) & year >= near_pa_ano) %>%
  
  # Keep only observations within -12km to +12km from SP boundary
  filter(near_sp >= -12000 & near_sp <= 12000) %>%
  
  # Set deforestation_pct to NA for other protected areas (IT and SU)
  mutate(deforestation_pct = ifelse(grid_is_it == 1 & grid_is_su == 1, NA, deforestation_pct))

# Preparing binned data for plotting
p_binned <- p %>%
  # Creating 500-meter bins along 'near_it' distance
  mutate(bin = cut(near_sp, breaks = seq(-12000, 12000, by = 500), include.lowest = TRUE)) %>%
  
  # Aggregating by bin, year, and whether grid is inside SP
  group_by(bin, year, grid_is_sp) %>%
  summarise(
    near_sp = mean(near_sp, na.rm = TRUE),          # Average distance per bin
    deforestation_pct = mean(deforestation_pct, na.rm = TRUE),  # Average deforestation per bin
    .groups = "drop"
  ) %>%
  
  # Computing midpoint of each bin for plotting
  mutate(
    bin_midpoint = as.numeric(sub("\\[\\s*(-?\\d+),.*", "\\1", as.character(bin))) + 250
  )

# Filtering out NAs to prevent warnings in ggplot
p_binned_plot <- p_binned %>%
  filter(!is.na(deforestation_pct) & !is.na(bin_midpoint))

# Creating the plot
av_def_sp_pre <- ggplot() + 

  # Local polynomial regression (inside SP) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_sp == 1), 
    aes(x = near_sp, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Local polynomial regression (outside SP) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_sp == 0), 
    aes(x = near_sp, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Vertical line at SP boundary (x = 0)
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Horizontal line at 0 deforestation
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Color palette for years
  scale_color_manual(values = c(  
    "2004" = "#D0E1F2",  
    "2005" = "#D0E1F2",  
    "2006" = "#79A8D1",  
    "2007" = "#79A8D1",  
    "2008" = "#79A8D1",  
    "2009" = "#082A69",  
    "2010" = "#082A69",  
    "2011" = "#082A69"  
  )) +  
  
  # Labels
  labs(title = "",
       x = "Distance (Km)",
       y = "",
       color = "Year") +  
  
  # Theme customization
  theme_minimal(base_size = 18) +  
  theme(
    legend.position = "none",
    legend.title = element_text(size = 14, face = "bold"),
    legend.text = element_text(size = 14),
    axis.title = element_text(size = 22, face = "bold"),
    axis.text = element_text(size = 25),
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, face = "italic", hjust = 0.5),
    panel.grid.major = element_line(color = "white", size = 0.5),
    panel.grid.minor = element_line(color = "white", size = 0.3),
    panel.background = element_blank(),
    plot.background = element_blank(),
    axis.line = element_line(color = "black", size = 1)
  ) +  
  
  # Zoom in safely without dropping points
  coord_cartesian(
    xlim = c(-10000, 12000),  # in meters
    ylim = c(-0.1, 1.8)
  ) +  
  
  # Adjust axis scales
  scale_x_continuous(
    breaks = seq(-10000, 12000, by = 5000),  # every 5 km
    labels = scales::number_format(scale = 0.001),  # convert to km
    expand = c(0, 0)
  ) +
  scale_y_continuous(
    breaks = seq(0, 2.5, by = 0.5),
    expand = c(0, 0.1)
  )

av_def_sp_pre


###### Filtering and preparing raw data - Post-Forest Code - 2012-2022
p <- df_pas %>%
  # Keep only the years 2004–2011
  filter(year >= 2012 & year <= 2022) %>%
  # For Arc of Deforestation, also use: # filter(sigla_uf %in% c("PA", "MT", "RO", "AC"))
  # For Non-Arc of Deforestation, also use: filter(!sigla_uf %in% c("PA", "MT", "RO", "AC"))
  
  # Group by unique spatial unit (near_pa_FID) and year
  group_by(near_pa_FID, year) %>%
  
  #Keep only grids in SPs homologated by the current year and remove grids with no homologation year (NA) 
  subset(!is.na(near_pa_ano) & year >= near_pa_ano) %>%
  
  # Keep only observations within -12km to +12km from Sp boundary
  filter(near_sp >= -12000 & near_sp <= 12000) %>%
  
  # Set deforestation_pct to NA for other protected areas (IT and SU)
  mutate(deforestation_pct = ifelse(grid_is_it == 1 & grid_is_su == 1, NA, deforestation_pct))

# Preparing binned data for plotting
p_binned <- p %>% 
  # Creating 500-meter bins along 'near_it' distance
  mutate(bin = cut(near_sp, breaks = seq(-12000, 12000, by = 500), include.lowest = TRUE)) %>%
  
  # Aggregating by bin, year, and whether grid is inside SP
  group_by(bin, year, grid_is_sp) %>%
  summarise(
    near_sp = mean(near_sp, na.rm = TRUE),  # Average distance per bin
    deforestation_pct = mean(deforestation_pct, na.rm = TRUE), # Average deforestation per bin
    .groups = "drop"
  ) %>%
  # Computing midpoint of each bin for plotting
  mutate(
    bin_midpoint = as.numeric(sub("\\[\\s*(-?\\d+),.*", "\\1", as.character(bin))) + 250)

# Filtering out NAs to prevent warnings in ggplot
p_binned_plot <- p_binned %>%
  filter(!is.na(deforestation_pct) & !is.na(bin_midpoint))

# Creating the plot
av_def_sp_post <- ggplot() + 
  
  # Local polynomial regression (inside SP) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_sp == 1), 
    aes(x = near_sp, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Local polynomial regression (outside SP) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_sp == 0), 
    aes(x = near_sp, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Vertical line at SP boundary (x = 0)
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Horizontal line at 0 deforestation
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Color palette for years
  scale_color_manual(values = c(  
    "2012" = "#FAD2D2",
    "2013" = "#FAD2D2",
    "2014" = "#FAD2D2",
    "2015" = "#F27A7A",
    "2016" = "#F27A7A",
    "2017" = "#F27A7A",
    "2018" = "#F26565",
    "2019" = "#B30000",
    "2020" = "#B30000",
    "2021" = "#800000",
    "2022" = "#800000" 
  )) +  
  
  # Labels
  labs(title = "",
       x = "Distance (Km)",
       y = "",
       color = "Year") +  
  
  # Theme customization
  theme_minimal(base_size = 18) +  
  theme(
    legend.position = "none",
    legend.title = element_text(size = 14, face = "bold"),
    legend.text = element_text(size = 14),
    axis.title = element_text(size = 22, face = "bold"),
    axis.text = element_text(size = 25),
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, face = "italic", hjust = 0.5),
    panel.grid.major = element_line(color = "white", size = 0.5),
    panel.grid.minor = element_line(color = "white", size = 0.3),
    panel.background = element_blank(),
    plot.background = element_blank(),
    axis.line = element_line(color = "black", size = 1)
  ) +  
  
  # Zoom in safely without dropping points
  coord_cartesian(
    xlim = c(-10000, 12000),  # in meters
    ylim = c(-0.1, 1.8)
  ) +  
  
  # Adjust axis scales
  scale_x_continuous(
    breaks = seq(-10000, 12000, by = 5000),  # every 5 km
    labels = scales::number_format(scale = 0.001),  # convert to km
    expand = c(0, 0)
  ) +
  scale_y_continuous(
    breaks = seq(0, 2.5, by = 0.5),
    expand = c(0, 0.1)
  )

av_def_sp_post

#------------------------------------------------------------------------------#
### Robustness Check 1 - Different Bandwidths ---- Figure S12 -----------
#-------------------------------------------------------------------------------
bandwidths <- c(3000, 6000, 9000, 12000, 15000, 18000, 20000)

# Prepare your data
p <- df_pas %>%
  filter(near_sp >= -20000 & near_sp <= 20000) %>%
  group_by(near_pa_FID, year)

# Initialize list to store RDD models
mod_list_all <- list()

# Loop over years
for (bw in bandwidths) {
  mod_list <- list()
  
  for (i in 2004:2022) {
    p_i <- subset(p, year == i)
    
    # Remove grids that are inside any SP that was homologated after the current year (future SPs)
    p_i <- subset(p_i, !is.na(near_pa_ano) & year >= near_pa_ano)
    
    # Exclude other protected areas from analysis (IT and SU)
    p_i$deforestation_pct <- ifelse(p_i$grid_is_it == 1 & p_i$grid_is_su == 1, NA, p_i$deforestation_pct)
    
    # Outcome and running variable
    Y <- p_i$deforestation_pct
    X <- p_i$near_sp
    
    valid <- !is.na(Y) & !is.na(X)
    Y <- Y[valid]
    X <- X[valid]
    cluster <- p_i$near_pa_FID[valid]
    
    # Only run if both sides of cutoff have variation
    if (length(unique(X[X < 0])) > 1 & length(unique(X[X > 0])) > 1) {
      mod_list[[as.character(i)]] <- rdrobust(
        Y, X, c = 0, kernel = "triangular", p = 1,
        h = bw, vce = "nn", cluster = cluster
      )
    } else {
      mod_list[[as.character(i)]] <- NULL
    }
  }
  mod_list_all[[as.character(bw)]] <- mod_list
}

# Initialize an empty list to store results from each year
dat_rdd_all <- list()

for (bw in names(mod_list_all)) {
  mod_list <- mod_list_all[[bw]]
  dat_rdd <- list()
  
  for (year in names(mod_list)) {
    mod <- mod_list[[year]]
    if (!is.null(mod)) {
      coef_i <- mod$coef
      se_i <- mod$se
      dat_i <- data.frame(
        Estimate = coef_i[3],  # Robust estimate
        S.E. = se_i[3],        # Robust standard error
        Year = as.numeric(year),
        Bandwidth = as.numeric(bw)
      )
      dat_rdd[[year]] <- dat_i
    }
  }
  dat_rdd_all[[bw]] <- do.call(rbind, dat_rdd)
}

# Combine all into one dataframe
plot <- do.call(rbind, dat_rdd_all)

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Make Year a factor if needed for plotting
plot$Year <- factor(plot$Year, levels = sort(unique(plot$Year)))

plot_bw_sp <- ggplot(plot, aes(x = Year, y = Estimate, color = as.factor(Bandwidth))) + 
  geom_hline(yintercept = 0, color = "black", size = 1) + 
  geom_vline(xintercept = c("2012", "2019"), color = "black", linetype = "dashed", size = 1) + 
  geom_point(size = 2, position = position_dodge(width = 0.5)) + 
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, size = 0.8,
                position = position_dodge(width = 0.5)) + 
  geom_errorbar(aes(ymin = lower1, ymax = upper1), width = 0.2, size = 1.5, alpha = 0.6,
                position = position_dodge(width = 0.5)) + 
  scale_y_continuous(
    breaks = seq(-2.5, 0.5, by = 0.5),
    limits = c(-2.5, 0.5)
  ) + 
  scale_x_discrete(drop = FALSE) + 
  labs(
    x = "Year",
    y = "RDD Estimate",
    title = "",
    color = "Bandwidth"
  ) + 
  theme_minimal() + 
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, size = 18),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16),
    axis.title.y = element_text(size = 16),
    legend.title = element_text(size = 16),
    legend.text = element_text(size = 14),
    legend.position = "bottom"
  ) + 
  annotate("text", x = "2012", y = -1, label = "Forest Code", angle = 90, vjust = -0.5, size = 5) +
  annotate("text", x = "2019", y = -1, label = "Bolsonaro Elected", angle = 90, vjust = -0.5, size = 5)

print(plot_bw_sp)
  

#------------------------------------------------------------------------------#
### Robustness Check 2 - Balance Checks ---- Figure S15 ----
#-------------------------------------------------------------------------------
# Only keep grid cells within ±12 km of the IT border (homologated)
balance_data <- subset(
    df_pas,
    near_sp >= -12000 & near_sp <= 12000 &   
      !is.na(near_pa_ano) &             
      year >= near_pa_ano  
  )

# List of covariates to plot with their labels
vars_to_plot <- list(
  elevation = "Elevation",
  hillshade = "Hillshade",
  slope = "Slope",
  near_rivers = "Rivers",
  near_roads_05 = "Roads",
  near_cities_FID = "Cities"
)

# Generating plots
plot_rdd <- function(var_name, y_label) {
  # Replace values inside other PAs with NA
  balance_data[[var_name]] <- ifelse(balance_data$grid_is_it == 1 & 
                                       balance_data$grid_is_su == 1, NA, balance_data[[var_name]])
  
  # Generate rdplot
  p <- rdplot(balance_data[[var_name]], balance_data$near_sp,
              c = 0, p = 1, kernel = "tri", nbins = 300,
              x.label = "Distance to SP Border (m)",
              y.label = NULL,
              title = y_label)$rdplot
  
  # Apply a larger, publication-quality theme
  p + theme(
    axis.title.x = element_text(size = 12),
    axis.title.y = element_blank(),
    axis.text.x  = element_text(size = 12),
    axis.text.y  = element_text(size = 12),
    plot.title   = element_text(size = 12, face = "bold", hjust = 0.5),
    plot.margin  = margin(t = 10, r = 15, b = 10, l = 15)
  )
}

# Loop through variables and generate plots
plots <- lapply(names(vars_to_plot), function(v) plot_rdd(v, vars_to_plot[[v]]))

#------------------------------------------------------------------------------#
### Robustness Check 3 - Placebo Threshold Tests ---- Figure S17 - Middle ----
#-------------------------------------------------------------------------------
# Preparing the data
p <- df_pas %>%
  filter(near_sp >= -12000 & near_sp <= 12000) %>%
  group_by(near_pa_FID, year)

# Placebo offsets in meters
placebo_offsets <- c(-5000, -2000, 2000, 5000)

dat_rdd_all <- list()
mod_placebo <- list()

for(offset in placebo_offsets){
  mod_list <- list()
  
  for(i in 2004:2022){
    p_i <- subset(p, year == i)
    
    p_i <- subset(p_i, !is.na(near_pa_ano) & year >= near_pa_ano)
    
    # Exclude other protected areas from analysis (IT and SU)
    p_i$deforestation_pct <- ifelse(p_i$grid_is_it == 1 & p_i$grid_is_su == 1, NA, p_i$deforestation_pct)
    
    Y <- p_i$deforestation_pct
    X <- p_i$near_sp
    
    mod_list[[i - 2003]] <- rdrobust(
      Y, X, c = offset, kernel = "triangular", p = 1,
      bwselect = "mserd", vce = "nn", cluster = p_i$near_pa_FID
    )
  }
  
  # Collect results
  dat_rdd <- list()
  for(j in 1:length(mod_list)){
    coef_i <- mod_list[[j]]$coef[1]
    se_i   <- mod_list[[j]]$se[1]
    dat_rdd[[j]] <- data.frame(
      Estimate = coef_i,
      S.E = se_i,
      Year = 2003 + j,
      Bandwidth = paste0(ifelse(offset>0,"+", ""), offset/1000, "km")
      
    )
  }
  
  dat_rdd_all[[paste0(offset)]] <- rbindlist(dat_rdd)
}

plot <- do.call(rbind, dat_rdd_all)

# Compute confidence intervals
plot$lower  <- plot$Estimate - 1.96 * plot$S.E
plot$upper  <- plot$Estimate + 1.96 * plot$S.E
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E

# Make Year a factor
plot$Year <- factor(plot$Year, levels = sort(unique(plot$Year)))

# Plot
plot_bw_placebo_sp <- ggplot(plot, aes(x = Year, y = Estimate, color = Bandwidth)) +
  geom_hline(yintercept = 0, color = "black", size = 1) +
  geom_vline(xintercept = c("2012", "2019"), color = "black", linetype = "dashed", size = 1) +
  geom_point(size = 2, position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, size = 0.8,
                position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymin = lower1, ymax = upper1), width = 0.2, size = 1.5, alpha = 0.6,
                position = position_dodge(width = 0.5)) +
  scale_y_continuous(
    breaks = seq(-2.5, 1.5, by = 0.5),
    limits = c(-2.5, 1.5)
  ) +
  scale_x_discrete(drop = FALSE) +
  labs(
    x = "Year",
    y = "RDD Estimate",
    color = "Threshold Distance"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, size = 18),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16),
    axis.title.y = element_text(size = 16),
    legend.title = element_text(size = 16),
    legend.text = element_text(size = 14),
    legend.position = "bottom"
  ) +
  annotate("text", x = "2012", y = -1.5, label = "Forest Code", angle = 90, vjust = -0.5, size = 4) +
  annotate("text", x = "2019", y = -1.5, label = "Bolsonaro Elected", angle = 90, vjust = -0.5, size = 4)

print(plot_bw_placebo_sp)

#-----------------------------------------------------------------------------#
### Estimation SU - Geographic RDDs - Full Sample ----- Figure 3, Table S7 - Bottom
#-----------------------------------------------------------------------------
# Create 'near_su' containing 'near_pa' values only for SU areas; others set to NA
df_pas$near_su <- ifelse(df_pas$near_pa_type == "Uso Sustentável", df_pas$near_pa, 
                           as.numeric(NA))

# Adjust distance variable: negative outside IT, positive inside
df_pas$near_su <- ifelse(df_pas$grid_is_su == 0, df_pas$near_su*(-1), df_pas$near_su)

## SUs measured every period year using a 12k bandwidth
p <- df_pas %>%
  filter(near_su >= -12000 & near_su <= 12000) %>%
  group_by(near_pa_FID, year)

p <- p %>% 
  mutate(
    year = case_when(
      year %in% 2004:2007 ~ 2004,
      year %in% 2008:2011 ~ 2008,
      year %in% 2012:2018 ~ 2012,
      year %in% 2019:2022 ~ 2019,
      TRUE ~ year
    )
  )

p <- p %>% 
  mutate(
    year3 = case_when(
      year %in% 2004:2007 ~ 1,
      year %in% 2008:2011 ~ 2,
      year %in% 2012:2018 ~ 3,
      year %in% 2019:2022 ~ 4,
      TRUE ~ NA_real_
    )
  )

# Initialize list to store RDD models
mod <- list()
# Loop over years
for(i in 1:4){
  p_i <- subset(p, year3 == i)
  
  # Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
  p_i <- subset(p_i, !is.na(near_pa_ano) & year >= near_pa_ano)
  
  # Exclude other protected areas (SP and SU) from analysis
  p_i$deforestation_pct <- ifelse(p_i$grid_is_it == 1 & p_i$grid_is_sp == 1, NA, p_i$deforestation_pct)
  
  # Outcome and running variable
  Y <- p_i$deforestation_pct
  X <- p_i$near_su
  
  
  # Run local linear RDD with triangular kernel and nearest-neighbor clustered SE
  mod[[i]] <- rdrobust(
    Y, X, c = 0, kernel = "triangular", p = 1,
    bwselect = "mserd", vce = "hc3", cluster = p_i$near_pa_FID
  )
}

# Initialize an empty list to store results from each year
dat_rdd <- list()

# Loop over 19 years (assumes mod list contains results for 19 years)
for(i in 1:4){
  # Extract coefficients and standard errors from the rdrobust model for year i
  coef_i <- mod[[i]]$coef
  se_i <- mod[[i]]$se
  
  # Create a data frame for this year with coefficients, standard errors, and year index
  dat_i <- data.frame(coef_i, se_i, i)
  
  # Assign type labels (Conventional, Bias-corrected, Robust) for each coefficient
  dat_i$type <- c("Conventional", "Bias", "Robust")
  
  # Store this year's data in the list
  dat_rdd[[i]] <- dat_i
}

# Combine all yearly data frames into a single data table
data_rdd <- rbindlist(dat_rdd, use.names = TRUE)

# Keep only the robust estimates for plotting
data_rdd <- subset(data_rdd, type == "Robust")

# Convert to a data frame for ggplot
plot <- data.frame(data_rdd, stringsAsFactors = FALSE)

# Rename columns for clarity
names(plot) <- c("Estimate", "S.E.", "Year3", "type")

# Convert Estimate and S.E. to numeric
plot$Estimate <- as.numeric(as.character(plot$Estimate))
plot$S.E. <- as.numeric(as.character(plot$S.E.))

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Store the original year index and adjust year to actual calendar year
plot$val <- plot$Year
plot$Year <- plot$Year + 2003

# Ensure year is treated as a factor for plotting
plot$Year <- factor(plot$Year, levels = plot$Year[order(plot$val)])

# Additional column for plotting aesthetics (not used in this snippet)
plot$main <- c(rep("D", 4))

# Define a custom theme for plots
mytheme <- theme(
  axis.line = element_line(size = 1, colour = "black"),
  panel.background = element_rect(fill = "white")
)

# Suppose your plot data has one row per period (year3)
plot$PeriodGroupLabel <- factor(
  plot$Year3,          # the period index: 1,2,3,4
  levels = 1:4,
  labels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")
)

# Ensure PeriodGroup is a factor in the desired order
plot$PeriodGroup <- factor(plot$PeriodGroup,
                           levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022"))

# Data frame for all four periods for highlighting
highlight_periods <- data.frame(
  PeriodGroup = factor(c("2004–2007", "2008–2011", "2012–2018", "2019–2022"),
                       levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")),
  ymin = -2,
  ymax = 0.5,
  fill_color = c("white", "white", "lightgray", "lightgray"),  # alternating colors
  text = c("PPCDAM I", "PPCDAM II", "Forest Code", "Bolsonaro Adm")  # text inside tiles
)

# Plot
plot1_su <- ggplot(plot) +
  
  # Highlight periods with alternating colors
  geom_tile(data = highlight_periods,
            aes(x = PeriodGroup, y = (ymin + ymax)/2, height = ymax - ymin, width = 0.9, fill = fill_color),
            alpha = 0.4, inherit.aes = FALSE) +
  
  # Horizontal reference line at 0
  geom_hline(yintercept = 0, color = "black", size = 1) +
  
  # 90% CI (thicker)
  geom_segment(aes(x = PeriodGroup, y = lower1, xend = PeriodGroup, yend = upper1, color = PeriodGroup),
               size = 2) +   # thicker
  
  # 95% CI
  geom_segment(aes(x = PeriodGroup, y = lower, xend = PeriodGroup, yend = upper, color = PeriodGroup),
               size = 1.2) +   # thicker
  
  # Points (thicker)
  geom_point(aes(x = PeriodGroup, y = Estimate, color = PeriodGroup), size = 5) +  # bigger point
  
  # Text inside tiles
  #geom_text(data = highlight_periods,
  #aes(x = PeriodGroup, y = 0.4, label = text),
  #color = "black", size = 5, inherit.aes = FALSE) +
  
  # Custom colors for points/segments
  scale_color_manual(
    values = c(
      "2004–2007" = "#4C78A8",
      "2008–2011" = "#4C78A8",   # brownish
      "2012–2018"   = "#F27A7A",   # blue
      "2019–2022"       = "#990000"    # purple
    ),
    name = "Period"
  ) +
  
  # Use fill_color for tiles but do not show legend
  scale_fill_identity() +
  
  # Theme: white background
  theme_minimal(base_size = 18) +
  theme(
    panel.background = element_rect(fill = "white", color = "white"),
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 18),
    axis.text.y = element_text(size = 18),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 18),
    legend.position = "none"
  ) +
  
  # Labels
  ylab("RDD Estimate") +
  
  # Y-axis limits and breaks
  scale_y_continuous(limits = c(-2, 0.5), breaks = seq(-2.5, 0.5, 0.5))

# Display the plot
plot1_su

## Display the coefficients in a tabular format - Table S7 - Panel C
# Compute p-values
plot$p_value <- 2 * (1 - pnorm(abs(plot$Estimate / plot$S.E.)))

# Add conventional significance stars
plot$significance <- cut(
  plot$p_value,
  breaks = c(-Inf, 0.001, 0.05, 0.1, Inf),
  labels = c("***", "**", "*", ""),
  right = FALSE
)

plot$Estimate_display <- paste0(sprintf("%.3f", plot$Estimate), plot$significance)

table_S7 <- plot[, c("Year3", "Estimate_display", "S.E.")]
colnames(table_S7) <- c("Year3", "Estimate", "Std. Error")

# Display
table_S7


#--------------------------------------------------------------- --------
### Estimation SU - Geographic RDDs - Arc of Deforestation ---- Figure 3, Table S7 - Bottom
#-----------------------------------------------------------------------------
## Every period using a 12k bandwidth, restricted to states in Arc-Region
p <- df_pas %>%
  filter(near_su >= -12000 & near_su <= 12000) %>%
  filter(sigla_uf %in% c("PA", "MT", "RO", "AC")) %>%
  group_by(near_pa_FID, year)

p <- p %>% 
  mutate(
    year = case_when(
      year %in% 2004:2007 ~ 2004,
      year %in% 2008:2011 ~ 2008,
      year %in% 2012:2018 ~ 2012,
      year %in% 2019:2022 ~ 2019,
      TRUE ~ year
    )
  )

p <- p %>% 
  mutate(
    year3 = case_when(
      year %in% 2004:2007 ~ 1,
      year %in% 2008:2011 ~ 2,
      year %in% 2012:2018 ~ 3,
      year %in% 2019:2022 ~ 4,
      TRUE ~ NA_real_
    )
  )

# Initialize list to store RDD models
mod <- list()
# Loop over years
for(i in 1:4){
  p_i <- subset(p, year3 == i)
  
  # Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
  p_i <- subset(p_i, !is.na(near_pa_ano) & year >= near_pa_ano)
  
  # Exclude other protected areas (SP and SU) from analysis
  p_i$deforestation_pct <- ifelse(p_i$grid_is_it == 1 & p_i$grid_is_sp == 1, NA, p_i$deforestation_pct)
  
  # Outcome and running variable
  Y <- p_i$deforestation_pct
  X <- p_i$near_su
  
  # Run local linear RDD with triangular kernel and nearest-neighbor clustered SE
  mod[[i]] <- rdrobust(
    Y, X, c = 0, kernel = "triangular", p = 1,
    bwselect = "mserd", vce = "hc3", cluster = p_i$near_pa_FID
  )
}

# Initialize an empty list to store results from each year
dat_rdd <- list()

# Loop over 19 years (assumes mod list contains results for 19 years)
for(i in 1:4){
  # Extract coefficients and standard errors from the rdrobust model for year i
  coef_i <- mod[[i]]$coef
  se_i <- mod[[i]]$se
  
  # Create a data frame for this year with coefficients, standard errors, and year index
  dat_i <- data.frame(coef_i, se_i, i)
  
  # Assign type labels (Conventional, Bias-corrected, Robust) for each coefficient
  dat_i$type <- c("Conventional", "Bias", "Robust")
  
  # Store this year's data in the list
  dat_rdd[[i]] <- dat_i
}

# Combine all yearly data frames into a single data table
data_rdd <- rbindlist(dat_rdd, use.names = TRUE)

# Keep only the robust estimates for plotting
data_rdd <- subset(data_rdd, type == "Robust")

# Convert to a data frame for ggplot
plot <- data.frame(data_rdd, stringsAsFactors = FALSE)

# Rename columns for clarity
names(plot) <- c("Estimate", "S.E.", "Year3", "type")

# Convert Estimate and S.E. to numeric
plot$Estimate <- as.numeric(as.character(plot$Estimate))
plot$S.E. <- as.numeric(as.character(plot$S.E.))

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Store the original year index and adjust year to actual calendar year
plot$val <- plot$Year
plot$Year <- plot$Year + 2003

# Ensure year is treated as a factor for plotting
plot$Year <- factor(plot$Year, levels = plot$Year[order(plot$val)])

# Additional column for plotting aesthetics (not used in this snippet)
plot$main <- c(rep("D", 4))

# Define a custom theme for plots
mytheme <- theme(
  axis.line = element_line(size = 1, colour = "black"),
  panel.background = element_rect(fill = "white")
)

# Suppose your plot data has one row per period (year3)
plot$PeriodGroupLabel <- factor(
  plot$Year3,          # the period index: 1,2,3,4
  levels = 1:4,
  labels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")
)

# Ensure PeriodGroup is a factor in the desired order
plot$PeriodGroup <- factor(plot$PeriodGroup,
                           levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022"))

# Data frame for all four periods for highlighting
highlight_periods <- data.frame(
  PeriodGroup = factor(c("2004–2007", "2008–2011", "2012–2018", "2019–2022"),
                       levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")),
  ymin = -2,
  ymax = 0.5,
  fill_color = c("white", "white", "lightgray", "lightgray"),  # alternating colors
  text = c("PPCDAM I", "PPCDAM II", "Forest Code", "Bolsonaro Adm")  # text inside tiles
)

# Plot
plot1_su_arc <- ggplot(plot) +
  
  # Highlight periods with alternating colors
  geom_tile(data = highlight_periods,
            aes(x = PeriodGroup, y = (ymin + ymax)/2, height = ymax - ymin, width = 0.9, fill = fill_color),
            alpha = 0.4, inherit.aes = FALSE) +
  
  # Horizontal reference line at 0
  geom_hline(yintercept = 0, color = "black", size = 1) +
  
  # 90% CI (thicker)
  geom_segment(aes(x = PeriodGroup, y = lower1, xend = PeriodGroup, yend = upper1, color = PeriodGroup),
               size = 2) +   # thicker
  
  # 95% CI
  geom_segment(aes(x = PeriodGroup, y = lower, xend = PeriodGroup, yend = upper, color = PeriodGroup),
               size = 1.2) +   # thicker
  
  # Points (thicker)
  geom_point(aes(x = PeriodGroup, y = Estimate, color = PeriodGroup), size = 5) +  # bigger point
  
  # Text inside tiles
  # geom_text(data = highlight_periods,
  # aes(x = PeriodGroup, y = 0.4, label = text),
  # color = "black", size = 5, inherit.aes = FALSE) +
  
  # Custom colors for points/segments
  scale_color_manual(
    values = c(
      "2004–2007" = "#4C78A8",
      "2008–2011" = "#4C78A8",   # brownish
      "2012–2018"   = "#F27A7A",   # blue
      "2019–2022"       = "#990000"    # purple
    ),
    name = "Period"
  ) +
  
  # Use fill_color for tiles but do not show legend
  scale_fill_identity() +
  
  # Theme: white background
  theme_minimal(base_size = 18) +
  theme(
    panel.background = element_rect(fill = "white", color = "white"),
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 18),
    axis.text.y = element_text(size = 18),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 18),
    legend.position = "none"
  ) +
  
  # Labels
  ylab("RDD Estimate") +
  
  # Y-axis limits and breaks
  scale_y_continuous(limits = c(-2, 0.5), breaks = seq(-2.5, 0.5, 0.5))

# Display the plot
plot1_su_arc

## Display the coefficients in a tabular format - Table S7 - Panel C
# Compute p-values
plot$p_value <- 2 * (1 - pnorm(abs(plot$Estimate / plot$S.E.)))

# Add conventional significance stars
plot$significance <- cut(
  plot$p_value,
  breaks = c(-Inf, 0.001, 0.05, 0.1, Inf),
  labels = c("***", "**", "*", ""),
  right = FALSE
)

plot$Estimate_display <- paste0(sprintf("%.3f", plot$Estimate), plot$significance)

table_S7 <- plot[, c("Year3", "Estimate_display", "S.E.")]
colnames(table_S7) <- c("Year3", "Estimate", "Std. Error")

# Display
table_S7


#--------------------------------------------------------------- --------
### Estimation SU - Geographic RDDs - Non-Arc of Deforestation ---- #
#-----------------------------------------------------------------------------
## Every period using a 12k bandwidth, restricted to states in Non-Arc Region
p <- df_pas %>%
  filter(near_su >= -12000 & near_su <= 12000) %>%
  filter(!sigla_uf %in% c("PA", "MT", "RO", "AC")) %>%
  group_by(near_pa_FID, year)

p <- p %>% 
  mutate(
    year = case_when(
      year %in% 2004:2007 ~ 2004,
      year %in% 2008:2011 ~ 2008,
      year %in% 2012:2018 ~ 2012,
      year %in% 2019:2022 ~ 2019,
      TRUE ~ year
    )
  )

p <- p %>% 
  mutate(
    year3 = case_when(
      year %in% 2004:2007 ~ 1,
      year %in% 2008:2011 ~ 2,
      year %in% 2012:2018 ~ 3,
      year %in% 2019:2022 ~ 4,
      TRUE ~ NA_real_
    )
  )

# Initialize list to store RDD models
mod <- list()
# Loop over years
for(i in 1:4){
  p_i <- subset(p, year3 == i)
  
  # Keep only grids in ITs homologated by the current year and remove grids with no homologation year (NA) 
  p_i <- subset(p_i, !is.na(near_pa_ano) & year >= near_pa_ano)
  
  # Exclude other protected areas (SP and SU) from analysis
  p_i$deforestation_pct <- ifelse(p_i$grid_is_it == 1 & p_i$grid_is_sp == 1, NA, p_i$deforestation_pct)
  
  # Outcome and running variable
  Y <- p_i$deforestation_pct
  X <- p_i$near_su
  
  # Run local linear RDD with triangular kernel and nearest-neighbor clustered SE
  mod[[i]] <- rdrobust(
    Y, X, c = 0, kernel = "triangular", p = 1,
    bwselect = "mserd", vce = "hc3", cluster = p_i$near_pa_FID
  )
}

# Initialize an empty list to store results from each year
dat_rdd <- list()

# Loop over 19 years (assumes mod list contains results for 19 years)
for(i in 1:4){
  # Extract coefficients and standard errors from the rdrobust model for year i
  coef_i <- mod[[i]]$coef
  se_i <- mod[[i]]$se
  
  # Create a data frame for this year with coefficients, standard errors, and year index
  dat_i <- data.frame(coef_i, se_i, i)
  
  # Assign type labels (Conventional, Bias-corrected, Robust) for each coefficient
  dat_i$type <- c("Conventional", "Bias", "Robust")
  
  # Store this year's data in the list
  dat_rdd[[i]] <- dat_i
}

# Combine all yearly data frames into a single data table
data_rdd <- rbindlist(dat_rdd, use.names = TRUE)

# Keep only the robust estimates for plotting
data_rdd <- subset(data_rdd, type == "Robust")

# Convert to a data frame for ggplot
plot <- data.frame(data_rdd, stringsAsFactors = FALSE)

# Rename columns for clarity
names(plot) <- c("Estimate", "S.E.", "Year3", "type")

# Convert Estimate and S.E. to numeric
plot$Estimate <- as.numeric(as.character(plot$Estimate))
plot$S.E. <- as.numeric(as.character(plot$S.E.))

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Store the original year index and adjust year to actual calendar year
plot$val <- plot$Year
plot$Year <- plot$Year + 2003

# Ensure year is treated as a factor for plotting
plot$Year <- factor(plot$Year, levels = plot$Year[order(plot$val)])

# Additional column for plotting aesthetics (not used in this snippet)
plot$main <- c(rep("D", 4))

# Define a custom theme for plots
mytheme <- theme(
  axis.line = element_line(size = 1, colour = "black"),
  panel.background = element_rect(fill = "white")
)

# Suppose your plot data has one row per period (year3)
plot$PeriodGroupLabel <- factor(
  plot$Year3,          # the period index: 1,2,3,4
  levels = 1:4,
  labels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")
)

# Ensure PeriodGroup is a factor in the desired order
plot$PeriodGroup <- factor(plot$PeriodGroup,
                           levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022"))

# Data frame for all four periods for highlighting
highlight_periods <- data.frame(
  PeriodGroup = factor(c("2004–2007", "2008–2011", "2012–2018", "2019–2022"),
                       levels = c("2004–2007", "2008–2011", "2012–2018", "2019–2022")),
  ymin = -2,
  ymax = 0.5,
  fill_color = c("white", "white", "lightgray", "lightgray"),  # alternating colors
  text = c("PPCDAM I", "PPCDAM II", "Forest Code", "Bolsonaro Adm")  # text inside tiles
)

plot$point_size <- ifelse(plot$PeriodGroup == "2004–2007", 7, 5)

# Plot
plot1_su_non_arc <- ggplot(plot) +
  
  # Highlight periods with alternating colors
  geom_tile(data = highlight_periods,
            aes(x = PeriodGroup, y = (ymin + ymax)/2, height = ymax - ymin, width = 0.9, fill = fill_color),
            alpha = 0.4, inherit.aes = FALSE) +
  
  # Horizontal reference line at 0
  geom_hline(yintercept = 0, color = "black", size = 1) +
  
  # 90% CI (thicker)
  geom_segment(aes(x = PeriodGroup, y = lower1, xend = PeriodGroup, yend = upper1, color = PeriodGroup),
               size = 2) +   # thicker
  
  # 95% CI
  geom_segment(aes(x = PeriodGroup, y = lower, xend = PeriodGroup, yend = upper, color = PeriodGroup),
               size = 1.2) +   # thicker
  
  # Points (thicker)
  geom_point(aes(x = PeriodGroup, y = Estimate, color = PeriodGroup), size = 5) +  # bigger point
  
  # Text inside tiles
  #geom_text(data = highlight_periods,
  # aes(x = PeriodGroup, y = 0.4, label = text),
  # color = "black", size = 5, inherit.aes = FALSE) +
  
  # Custom colors for points/segments
  scale_color_manual(
    values = c(
      "2004–2007" = "#4C78A8",
      "2008–2011" = "#4C78A8",   # brownish
      "2012–2018"   = "#F27A7A",   # blue
      "2019–2022"       = "#990000"    # purple
    ),
    name = "Period"
  ) +
  
  # Use fill_color for tiles but do not show legend
  scale_fill_identity() +
  
  # Theme: white background
  theme_minimal(base_size = 18) +
  theme(
    panel.background = element_rect(fill = "white", color = "white"),
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 18),
    axis.text.y = element_text(size = 18),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 18),
    legend.position = "none"
  ) +
  
  # Labels
  ylab("RDD Estimate") +
  
  # Y-axis limits and breaks
  scale_y_continuous(limits = c(-2, 0.6), breaks = seq(-2.5, 0.5, 0.5))

# Display the plot
plot1_su_non_arc

## Display the coefficients in a tabular format - Table S7 - Panel C
# Compute p-values
plot$p_value <- 2 * (1 - pnorm(abs(plot$Estimate / plot$S.E.)))

# Add conventional significance stars
plot$significance <- cut(
  plot$p_value,
  breaks = c(-Inf, 0.001, 0.05, 0.1, Inf),
  labels = c("***", "**", "*", ""),
  right = FALSE
)

plot$Estimate_display <- paste0(sprintf("%.3f", plot$Estimate), plot$significance)

table_S7 <- plot[, c("Year3", "Estimate_display", "S.E.")]
colnames(table_S7) <- c("Year3", "Estimate", "Std. Error")

# Display
table_S7

#------------------------------------------------------------------------------#
# Comparing dynamic effects: Inside vs. Outside SU ---- Figure S10 - Bottom ----
#-------------------------------------------------------------------------------
p <- df_pas %>%
  
  filter(year >= 2004 & year <= 2011) %>%
  # For Arc of Deforestation, also use: # filter(sigla_uf %in% c("PA", "MT", "RO", "AC"))
  # For Non-Arc of Deforestation, also use: filter(!sigla_uf %in% c("PA", "MT", "RO", "AC"))
  
  # Group by unique spatial unit (near_pa_FID) and year
  group_by(near_pa_FID, year) %>%
  
  #Keep only grids in SU homologated by the current year and remove grids with no homologation year (NA) 
  subset(!is.na(near_pa_ano) & year >= near_pa_ano) %>%
  
  # Keep only observations within -12km to +12km from SU boundary
  filter(near_su >= -12000 & near_su <= 12000) %>%
  
  # Set deforestation_pct to NA for other protected areas (IT and SP)
  mutate(deforestation_pct = ifelse(grid_is_it == 1 & grid_is_sp == 1, NA, deforestation_pct))

# Preparing binned data for plotting
p_binned <- p %>%
  # Creating 500-meter bins along 'near_su' distance
  mutate(bin = cut(near_su, breaks = seq(-12000, 12000, by = 500), include.lowest = TRUE)) %>%
  
  # Aggregating by bin, year, and whether grid is inside SU
  group_by(bin, year, grid_is_su) %>%
  summarise(
    near_su = mean(near_su, na.rm = TRUE),          # Average distance per bin
    deforestation_pct = mean(deforestation_pct, na.rm = TRUE),  # Average deforestation per bin
    .groups = "drop"
  ) %>%
  
  # Computing midpoint of each bin for plotting
  mutate(
    bin_midpoint = as.numeric(sub("\\[\\s*(-?\\d+),.*", "\\1", as.character(bin))) + 250
  )

# Filtering out NAs to prevent warnings in ggplot
p_binned_plot <- p_binned %>%
  filter(!is.na(deforestation_pct) & !is.na(bin_midpoint))

# Creating the plot
av_def_su_pre <- ggplot() + 
  
  # Local polynomial regression (inside SU) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_su == 1), 
    aes(x = near_su, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Local polynomial regression (outside SU) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_su == 0), 
    aes(x = near_su, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Vertical line at SU boundary (x = 0)
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Horizontal line at 0 deforestation
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Color palette for years
  scale_color_manual(values = c(  
    "2004" = "#D0E1F2",  
    "2005" = "#D0E1F2",  
    "2006" = "#79A8D1",  
    "2007" = "#79A8D1",  
    "2008" = "#79A8D1",  
    "2009" = "#082A69",  
    "2010" = "#082A69",  
    "2011" = "#082A69"  
  )) +  
  
  # Labels
  labs(title = "",
       x = "",
       y = "",
       color = "Year") +  
  
  # Theme customization
  theme_minimal(base_size = 18) +  
  theme(
    legend.position = "none",
    legend.title = element_text(size = 14, face = "bold"),
    legend.text = element_text(size = 14),
    axis.title = element_text(size = 22, face = "bold"),
    axis.text = element_text(size = 25),
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, face = "italic", hjust = 0.5),
    panel.grid.major = element_line(color = "white", size = 0.5),
    panel.grid.minor = element_line(color = "white", size = 0.3),
    panel.background = element_blank(),
    plot.background = element_blank(),
    axis.line = element_line(color = "black", size = 1)
  ) +  
  
  # Zoom in safely without dropping points
  coord_cartesian(
    xlim = c(-10000, 12000),  # in meters
    ylim = c(-0.1, 1.8)
  ) +  
  
  # Adjust axis scales
  scale_x_continuous(
    breaks = seq(-10000, 12000, by = 5000),  # every 5 km
    labels = scales::number_format(scale = 0.001),  # convert to km
    expand = c(0, 0)
  ) +
  scale_y_continuous(
    breaks = seq(0, 2.5, by = 0.5),
    expand = c(0, 0.1)
  )

av_def_su_pre


###### Filtering and preparing raw data - Post-Forest Code - 2012-2022
p <- df_pas %>%
  
  filter(year >= 2012 & year <= 2022) %>%
  # For Arc of Deforestation, also use: # filter(sigla_uf %in% c("PA", "MT", "RO", "AC"))
  # For Non-Arc of Deforestation, also use: filter(!sigla_uf %in% c("PA", "MT", "RO", "AC"))
  
  # Group by unique spatial unit (near_pa_FID) and year
  group_by(near_pa_FID, year) %>%
  
  #Keep only grids in SPs homologated by the current year and remove grids with no homologation year (NA) 
  subset(!is.na(near_pa_ano) & year >= near_pa_ano) %>%
  
  # Keep only observations within -12km to +12km from SU boundary
  filter(near_su >= -12000 & near_su <= 12000) %>%
  
  # Set deforestation_pct to NA for other protected areas (IT and SP)
  mutate(deforestation_pct = ifelse(grid_is_it == 1 & grid_is_su == 1, NA, deforestation_pct))

# Preparing binned data for plotting
p_binned <- p %>% 
  # Creating 500-meter bins along 'near_su' distance
  mutate(bin = cut(near_su, breaks = seq(-12000, 12000, by = 500), include.lowest = TRUE)) %>%
  
  # Aggregating by bin, year, and whether grid is inside SU
  group_by(bin, year, grid_is_su) %>%
  summarise(
    near_su = mean(near_su, na.rm = TRUE),  # Average distance per bin
    deforestation_pct = mean(deforestation_pct, na.rm = TRUE), # Average deforestation per bin
    .groups = "drop"
  ) %>%
  # Computing midpoint of each bin for plotting
  mutate(
    bin_midpoint = as.numeric(sub("\\[\\s*(-?\\d+),.*", "\\1", as.character(bin))) + 250)

# Filtering out NAs to prevent warnings in ggplot
p_binned_plot <- p_binned %>%
  filter(!is.na(deforestation_pct) & !is.na(bin_midpoint))

# Creating the plot
av_def_su_post <- ggplot() + 
  
  # Local polynomial regression (inside SU) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_su == 1), 
    aes(x = near_su, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Local polynomial regression (outside SU) - solid line
  geom_smooth(
    data = p_binned %>% filter(!is.na(deforestation_pct) & grid_is_su == 0), 
    aes(x = near_su, y = deforestation_pct, color = factor(year)),
    method = "lm", formula = y ~ poly(x), se = TRUE, 
    size = 1.5, linetype = "solid", fill = "gray"
  ) +  
  
  # Vertical line at SU boundary (x = 0)
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Horizontal line at 0 deforestation
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray", size = 1.2) +  
  
  # Color palette for years
  scale_color_manual(values = c(  
    "2012" = "#FAD2D2",
    "2013" = "#FAD2D2",
    "2014" = "#FAD2D2",
    "2015" = "#F27A7A",
    "2016" = "#F27A7A",
    "2017" = "#F27A7A",
    "2018" = "#F26565",
    "2019" = "#B30000",
    "2020" = "#B30000",
    "2021" = "#800000",
    "2022" = "#800000" 
  )) +  
  
  # Labels
  labs(title = "",
       x = "",
       y = "",
       color = "Year") +  
  
  # Theme customization
  theme_minimal(base_size = 18) +  
  theme(
    legend.position = "none",
    legend.title = element_text(size = 14, face = "bold"),
    legend.text = element_text(size = 14),
    axis.title = element_text(size = 22, face = "bold"),
    axis.text = element_text(size = 25),
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, face = "italic", hjust = 0.5),
    panel.grid.major = element_line(color = "white", size = 0.5),
    panel.grid.minor = element_line(color = "white", size = 0.3),
    panel.background = element_blank(),
    plot.background = element_blank(),
    axis.line = element_line(color = "black", size = 1)
  ) +  
  
  # Zoom in safely without dropping points
  coord_cartesian(
    xlim = c(-10000, 12000),  # in meters
    ylim = c(-0.1, 1.8)
  ) +  
  
  # Adjust axis scales
  scale_x_continuous(
    breaks = seq(-10000, 12000, by = 5000),  # every 5 km
    labels = scales::number_format(scale = 0.001),  # convert to km
    expand = c(0, 0)
  ) +
  scale_y_continuous(
    breaks = seq(0, 2.5, by = 0.5),
    expand = c(0, 0.1)
  )

av_def_su_post

#------------------------------------------------------------------------------#
### Robustness Check 1 - Different Bandwidths ---- Figure S13 ----
#-------------------------------------------------------------------------------
bandwidths <- c(3000, 6000, 9000, 12000, 15000, 18000, 20000)

# Subsetting the data
p <- df_pas %>%
  filter(near_su >= -20000 & near_su <= 20000) %>%
  group_by(near_pa_FID, year)

# Initialize list to store RDD models
mod_list_all <- list()

# Loop over years
for (bw in bandwidths) {
  mod_list <- list()
  
  for (i in 2004:2022) {
    p_i <- subset(p, year == i)
    # Remove grids that are inside any SUs that was homologated after the current year (future SUs)
    p_i <- subset(p_i, !is.na(near_pa_ano) & year >= near_pa_ano)
    
    # Exclude other protected areas from analysis (IT and SP)
    p_i$deforestation_pct <- ifelse(p_i$grid_is_it == 1 & p_i$grid_is_sp == 1, NA, p_i$deforestation_pct)
    
    # Outcome and running variable
    Y <- p_i$deforestation_pct
    X <- p_i$near_su
    
    valid <- !is.na(Y) & !is.na(X)
    Y <- Y[valid]
    X <- X[valid]
    cluster <- p_i$near_pa_FID[valid]
    
    # Only run if both sides of cutoff have variation
    if (length(unique(X[X < 0])) > 1 & length(unique(X[X > 0])) > 1) {
      mod_list[[as.character(i)]] <- rdrobust(
        Y, X, c = 0, kernel = "triangular", p = 1,
        h = bw, vce = "nn", cluster = cluster
      )
    } else {
      mod_list[[as.character(i)]] <- NULL
    }
  }
  mod_list_all[[as.character(bw)]] <- mod_list
}

# Initialize an empty list to store results from each year
dat_rdd_all <- list()

for (bw in names(mod_list_all)) {
  mod_list <- mod_list_all[[bw]]
  dat_rdd <- list()
  
  for (year in names(mod_list)) {
    mod <- mod_list[[year]]
    if (!is.null(mod)) {
      coef_i <- mod$coef
      se_i <- mod$se
      dat_i <- data.frame(
        Estimate = coef_i[3],  # Robust estimate
        S.E. = se_i[3],        # Robust standard error
        Year = as.numeric(year),
        Bandwidth = as.numeric(bw)
      )
      dat_rdd[[year]] <- dat_i
    }
  }
  dat_rdd_all[[bw]] <- do.call(rbind, dat_rdd)
}

# Combine all into one dataframe
plot <- do.call(rbind, dat_rdd_all)

# Compute 95% confidence interval bounds
plot$lower <- plot$Estimate - 1.96 * plot$S.E.
plot$upper <- plot$Estimate + 1.96 * plot$S.E.

# Compute 90% confidence interval bounds
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E.
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E.

# Make Year a factor if needed for plotting
plot$Year <- factor(plot$Year, levels = sort(unique(plot$Year)))

plot_bw_su <- ggplot(plot, aes(x = Year, y = Estimate, color = as.factor(Bandwidth))) + 
  geom_hline(yintercept = 0, color = "black", size = 1) + 
  geom_vline(xintercept = c("2012", "2019"), color = "black", linetype = "dashed", size = 1) + 
  geom_point(size = 2, position = position_dodge(width = 0.5)) + 
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, size = 0.8,
                position = position_dodge(width = 0.5)) + 
  geom_errorbar(aes(ymin = lower1, ymax = upper1), width = 0.2, size = 1.5, alpha = 0.6,
                position = position_dodge(width = 0.5)) + 
  scale_y_continuous(
    breaks = seq(-2.5, 0.5, by = 0.5),
    limits = c(-2.5, 0.5)
  ) + 
  scale_x_discrete(drop = FALSE) + 
  labs(
    x = "Year",
    y = "RDD Estimate",
    title = "",
    color = "Bandwidth"
  ) + 
  theme_minimal() + 
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, size = 18),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16),
    axis.title.y = element_text(size = 16),
    legend.title = element_text(size = 16),
    legend.text = element_text(size = 14),
    legend.position = "bottom"
  ) + 
  annotate("text", x = "2012", y = -1, label = "Forest Code", angle = 90, vjust = -0.5, size = 5) +
  annotate("text", x = "2019", y = -1, label = "Bolsonaro Elected", angle = 90, vjust = -0.5, size = 5)

print(plot_bw_su)

#------------------------------------------------------------------------------#
### Robustness Check 2 - Balance Checks ---- Figure S16 ----
#-------------------------------------------------------------------------------
# Only keep grid cells within ±12 km of the IT border (homologated)
balance_data <- subset(
    df_pas,
    near_su >= -12000 & near_su <= 12000 &   
      !is.na(near_pa_ano) &             
      year >= near_pa_ano  
  )

# List of covariates to plot with their labels
vars_to_plot <- list(
  elevation = "Elevation",
  hillshade = "Hillshade",
  slope = "Slope",
  near_rivers = "Rivers",
  near_roads_05 = "Roads",
  near_cities_FID = "Cities"
)

# Generating plots
plot_rdd <- function(var_name, y_label) {
  # Replace values inside other PAs with NA
  balance_data[[var_name]] <- ifelse(balance_data$grid_is_it == 1 & 
                                       balance_data$grid_is_sp == 1, NA, balance_data[[var_name]])
  
  # Generate rdplot
  p <- rdplot(balance_data[[var_name]], balance_data$near_su,
              c = 0, p = 1, kernel = "tri", nbins = 300,
              x.label = "Distance to SP Border (m)",
              y.label = NULL,
              title = y_label)$rdplot
  
  # Apply a larger, publication-quality theme
  p + theme(
    axis.title.x = element_text(size = 12),
    axis.title.y = element_blank(),
    axis.text.x  = element_text(size = 12),
    axis.text.y  = element_text(size = 12),
    plot.title   = element_text(size = 12, face = "bold", hjust = 0.5),
    plot.margin  = margin(t = 10, r = 15, b = 10, l = 15)
  )
}

# Loop through variables and generate plots
plots <- lapply(names(vars_to_plot), function(v) plot_rdd(v, vars_to_plot[[v]]))

# Combining all plots into one figure
combined_plot_su <- do.call(grid.arrange, c(plots, ncol = 2))

#------------------------------------------------------------------------------#
### Robustness Check 3 - Placebo Threshold Tests ---- Figure S17 - Bottom ----
#-------------------------------------------------------------------------------
# Preparing the data
p <- df_pas %>%
  filter(near_su >= -12000 & near_su <= 12000) %>%
  group_by(near_pa_FID, year)

# Placebo offsets in meters
placebo_offsets <- c(-5000, -2000, 2000, 5000)

dat_rdd_all <- list()
mod_placebo <- list()

for(offset in placebo_offsets){
  mod_list <- list()
  
  for(i in 2004:2022){
    p_i <- subset(p, year == i)
    
    p_i <- subset(p_i, !is.na(near_pa_ano) & year >= near_pa_ano)
    
    # Exclude other protected areas from analysis (IT and SU)
    p_i$deforestation_pct <- ifelse(p_i$grid_is_it == 1 & p_i$grid_is_sp == 1, NA, p_i$deforestation_pct)
    
    Y <- p_i$deforestation_pct
    X <- p_i$near_su
    
    mod_list[[i - 2003]] <- rdrobust(
      Y, X, c = offset, kernel = "triangular", p = 1,
      bwselect = "mserd", vce = "nn", cluster = p_i$near_pa_FID
    )
  }
  
  # Collect results
  dat_rdd <- list()
  for(j in 1:length(mod_list)){
    coef_i <- mod_list[[j]]$coef[1]
    se_i   <- mod_list[[j]]$se[1]
    dat_rdd[[j]] <- data.frame(
      Estimate = coef_i,
      S.E = se_i,
      Year = 2003 + j,
      Bandwidth = paste0(ifelse(offset>0,"+", ""), offset/1000, "km")
      
    )
  }
  
  dat_rdd_all[[paste0(offset)]] <- rbindlist(dat_rdd)
}

plot <- do.call(rbind, dat_rdd_all)

# Compute confidence intervals
plot$lower  <- plot$Estimate - 1.96 * plot$S.E
plot$upper  <- plot$Estimate + 1.96 * plot$S.E
plot$lower1 <- plot$Estimate - 1.645 * plot$S.E
plot$upper1 <- plot$Estimate + 1.645 * plot$S.E

# Make Year a factor
plot$Year <- factor(plot$Year, levels = sort(unique(plot$Year)))

# Plot
plot_bw_placebo_su <- ggplot(plot, aes(x = Year, y = Estimate, color = Bandwidth)) +
  geom_hline(yintercept = 0, color = "black", size = 1) +
  geom_vline(xintercept = c("2012", "2019"), color = "black", linetype = "dashed", size = 1) +
  geom_point(size = 2, position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, size = 0.8,
                position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymin = lower1, ymax = upper1), width = 0.2, size = 1.5, alpha = 0.6,
                position = position_dodge(width = 0.5)) +
  scale_y_continuous(
    breaks = seq(-2.5, 1.5, by = 0.5),
    limits = c(-2.5, 1.5)
  ) +
  scale_x_discrete(drop = FALSE) +
  labs(
    x = "Year",
    y = "RDD Estimate",
    color = "Threshold Distance"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, size = 18),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16),
    axis.title.y = element_text(size = 16),
    legend.title = element_text(size = 16),
    legend.text = element_text(size = 14),
    legend.position = "bottom"
  ) +
  annotate("text", x = "2012", y = -1.5, label = "Forest Code", angle = 90, vjust = -0.5, size = 4) +
  annotate("text", x = "2019", y = -1.5, label = "Bolsonaro Elected", angle = 90, vjust = -0.5, size = 4)

print(plot_bw_placebo_su)

